summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/code_generator_arm64.cc347
-rw-r--r--compiler/optimizing/code_generator_arm64.h17
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.cc4
-rw-r--r--compiler/optimizing/code_generator_vector_arm.cc24
-rw-r--r--compiler/optimizing/code_generator_vector_arm64.cc57
-rw-r--r--compiler/optimizing/code_generator_vector_arm_vixl.cc24
-rw-r--r--compiler/optimizing/code_generator_vector_mips.cc24
-rw-r--r--compiler/optimizing/code_generator_vector_mips64.cc24
-rw-r--r--compiler/optimizing/code_generator_vector_x86.cc45
-rw-r--r--compiler/optimizing/code_generator_vector_x86_64.cc41
-rw-r--r--compiler/optimizing/graph_visualizer.cc5
-rw-r--r--compiler/optimizing/induction_var_range.cc28
-rw-r--r--compiler/optimizing/intrinsics_arm64.cc28
-rw-r--r--compiler/optimizing/intrinsics_mips.cc410
-rw-r--r--compiler/optimizing/intrinsics_mips64.cc412
-rw-r--r--compiler/optimizing/loop_optimization.cc209
-rw-r--r--compiler/optimizing/loop_optimization.h23
-rw-r--r--compiler/optimizing/nodes.h19
-rw-r--r--compiler/optimizing/nodes_vector.h80
19 files changed, 1655 insertions, 166 deletions
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index b39a0e43fa..4629c54a17 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -16,6 +16,7 @@
#include "code_generator_arm64.h"
+#include "arch/arm64/asm_support_arm64.h"
#include "arch/arm64/instruction_set_features_arm64.h"
#include "art_method.h"
#include "code_generator_utils.h"
@@ -25,6 +26,7 @@
#include "gc/accounting/card_table.h"
#include "intrinsics.h"
#include "intrinsics_arm64.h"
+#include "linker/arm64/relative_patcher_arm64.h"
#include "mirror/array-inl.h"
#include "mirror/class-inl.h"
#include "offsets.h"
@@ -81,6 +83,26 @@ static constexpr int kCurrentMethodStackOffset = 0;
// generates less code/data with a small num_entries.
static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
+// Reference load (except object array loads) is using LDR Wt, [Xn, #offset] which can handle
+// offset < 16KiB. For offsets >= 16KiB, the load shall be emitted as two or more instructions.
+// For the Baker read barrier implementation using link-generated thunks we need to split
+// the offset explicitly.
+constexpr uint32_t kReferenceLoadMinFarOffset = 16 * KB;
+
+// Flags controlling the use of link-time generated thunks for Baker read barriers.
+constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true;
+constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true;
+
+// Some instructions have special requirements for a temporary, for example
+// LoadClass/kBssEntry and LoadString/kBssEntry for Baker read barrier require
+// temp that's not an R0 (to avoid an extra move) and Baker read barrier field
+// loads with large offsets need a fixed register to limit the number of link-time
+// thunks we generate. For these and similar cases, we want to reserve a specific
+// register that's neither callee-save nor an argument register. We choose x15.
+inline Location FixedTempLocation() {
+ return Location::RegisterLocation(x15.GetCode());
+}
+
inline Condition ARM64Condition(IfCondition cond) {
switch (cond) {
case kCondEQ: return eq;
@@ -298,23 +320,22 @@ class LoadClassSlowPathARM64 : public SlowPathCodeARM64 {
constexpr bool call_saves_everything_except_r0_ip0 = (!kUseReadBarrier || kUseBakerReadBarrier);
CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
- // For HLoadClass/kBssEntry/kSaveEverything, make sure we preserve the page address of
- // the entry which is in a scratch register. Make sure it's not used for saving/restoring
- // registers. Exclude the scratch register also for non-Baker read barrier for simplicity.
+ InvokeRuntimeCallingConvention calling_convention;
+ // For HLoadClass/kBssEntry/kSaveEverything, the page address of the entry is in a temp
+ // register, make sure it's not clobbered by the call or by saving/restoring registers.
DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
bool is_load_class_bss_entry =
(cls_ == instruction_) && (cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry);
- UseScratchRegisterScope temps(arm64_codegen->GetVIXLAssembler());
if (is_load_class_bss_entry) {
- // This temp is a scratch register.
DCHECK(bss_entry_temp_.IsValid());
- temps.Exclude(bss_entry_temp_);
+ DCHECK(!bss_entry_temp_.Is(calling_convention.GetRegisterAt(0)));
+ DCHECK(
+ !UseScratchRegisterScope(arm64_codegen->GetVIXLAssembler()).IsAvailable(bss_entry_temp_));
}
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
- InvokeRuntimeCallingConvention calling_convention;
dex::TypeIndex type_index = cls_->GetTypeIndex();
__ Mov(calling_convention.GetRegisterAt(0).W(), type_index.index_);
QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage
@@ -387,14 +408,15 @@ class LoadStringSlowPathARM64 : public SlowPathCodeARM64 {
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
- // temp_ is a scratch register. Make sure it's not used for saving/restoring registers.
- UseScratchRegisterScope temps(arm64_codegen->GetVIXLAssembler());
- temps.Exclude(temp_);
+ InvokeRuntimeCallingConvention calling_convention;
+ // Make sure `temp_` is not clobbered by the call or by saving/restoring registers.
+ DCHECK(temp_.IsValid());
+ DCHECK(!temp_.Is(calling_convention.GetRegisterAt(0)));
+ DCHECK(!UseScratchRegisterScope(arm64_codegen->GetVIXLAssembler()).IsAvailable(temp_));
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
- InvokeRuntimeCallingConvention calling_convention;
const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
__ Mov(calling_convention.GetRegisterAt(0).W(), string_index.index_);
arm64_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
@@ -1416,6 +1438,7 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph,
graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+ baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
jit_string_patches_(StringReferenceValueComparator(),
graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
jit_class_patches_(TypeReferenceValueComparator(),
@@ -2236,7 +2259,8 @@ void LocationsBuilderARM64::HandleBinaryOp(HBinaryOperation* instr) {
}
}
-void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction) {
+void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction,
+ const FieldInfo& field_info) {
DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
bool object_field_get_with_read_barrier =
@@ -2250,7 +2274,17 @@ void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction) {
locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
// We need a temporary register for the read barrier marking slow
// path in CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier.
- locations->AddTemp(Location::RequiresRegister());
+ if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
+ !Runtime::Current()->UseJitCompilation() &&
+ !field_info.IsVolatile()) {
+ // If link-time thunks for the Baker read barrier are enabled, for AOT
+ // non-volatile loads we need a temporary only if the offset is too big.
+ if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) {
+ locations->AddTemp(FixedTempLocation());
+ }
+ } else {
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
locations->SetInAt(0, Location::RequiresRegister());
if (Primitive::IsFloatingPointType(instruction->GetType())) {
@@ -2279,7 +2313,8 @@ void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction,
// Object FieldGet with Baker's read barrier case.
// /* HeapReference<Object> */ out = *(base + offset)
Register base = RegisterFrom(base_loc, Primitive::kPrimNot);
- Register temp = WRegisterFrom(locations->GetTemp(0));
+ Location maybe_temp =
+ (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation();
// Note that potential implicit null checks are handled in this
// CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier call.
codegen_->GenerateFieldLoadWithBakerReadBarrier(
@@ -2287,7 +2322,7 @@ void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction,
out,
base,
offset,
- temp,
+ maybe_temp,
/* needs_null_check */ true,
field_info.IsVolatile());
} else {
@@ -2672,7 +2707,21 @@ void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) {
locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
// We need a temporary register for the read barrier marking slow
// path in CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier.
- locations->AddTemp(Location::RequiresRegister());
+ if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
+ !Runtime::Current()->UseJitCompilation() &&
+ instruction->GetIndex()->IsConstant()) {
+ // Array loads with constant index are treated as field loads.
+ // If link-time thunks for the Baker read barrier are enabled, for AOT
+ // constant index loads we need a temporary only if the offset is too big.
+ uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
+ uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue();
+ offset += index << Primitive::ComponentSizeShift(Primitive::kPrimNot);
+ if (offset >= kReferenceLoadMinFarOffset) {
+ locations->AddTemp(FixedTempLocation());
+ }
+ } else {
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
@@ -2708,11 +2757,25 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
// Object ArrayGet with Baker's read barrier case.
- Register temp = WRegisterFrom(locations->GetTemp(0));
// Note that a potential implicit null check is handled in the
// CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call.
- codegen_->GenerateArrayLoadWithBakerReadBarrier(
- instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ true);
+ if (index.IsConstant()) {
+ // Array load with a constant index can be treated as a field load.
+ offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type);
+ Location maybe_temp =
+ (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation();
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+ out,
+ obj.W(),
+ offset,
+ maybe_temp,
+ /* needs_null_check */ true,
+ /* use_load_acquire */ false);
+ } else {
+ Register temp = WRegisterFrom(locations->GetTemp(0));
+ codegen_->GenerateArrayLoadWithBakerReadBarrier(
+ instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ true);
+ }
} else {
// General case.
MemOperand source = HeapOperand(obj);
@@ -2989,6 +3052,11 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
if (!index.IsConstant()) {
__ Add(temp, array, offset);
+ } else {
+ // We no longer need the `temp` here so release it as the store below may
+ // need a scratch register (if the constant index makes the offset too large)
+ // and the poisoned `source` could be using the other scratch register.
+ temps.Release(temp);
}
{
// Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
@@ -3742,7 +3810,7 @@ void CodeGeneratorARM64::GenerateNop() {
}
void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
- HandleFieldGet(instruction);
+ HandleFieldGet(instruction, instruction->GetFieldInfo());
}
void InstructionCodeGeneratorARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
@@ -4544,6 +4612,11 @@ vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeDexCacheArrayPatch(
return NewPcRelativePatch(dex_file, element_offset, adrp_label, &pc_relative_dex_cache_patches_);
}
+vixl::aarch64::Label* CodeGeneratorARM64::NewBakerReadBarrierPatch(uint32_t custom_data) {
+ baker_read_barrier_patches_.emplace_back(custom_data);
+ return &baker_read_barrier_patches_.back().label;
+}
+
vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativePatch(
const DexFile& dex_file,
uint32_t offset_or_index,
@@ -4642,7 +4715,8 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patc
pc_relative_string_patches_.size() +
boot_image_type_patches_.size() +
pc_relative_type_patches_.size() +
- type_bss_entry_patches_.size();
+ type_bss_entry_patches_.size() +
+ baker_read_barrier_patches_.size();
linker_patches->reserve(size);
for (const PcRelativePatchInfo& info : pc_relative_dex_cache_patches_) {
linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(info.label.GetLocation(),
@@ -4676,6 +4750,10 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patc
target_type.dex_file,
target_type.type_index.index_));
}
+ for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
+ linker_patches->push_back(LinkerPatch::BakerReadBarrierBranchPatch(info.label.GetLocation(),
+ info.custom_data));
+ }
DCHECK_EQ(size, linker_patches->size());
}
@@ -4788,8 +4866,7 @@ void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) {
if (cls->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) {
if (!kUseReadBarrier || kUseBakerReadBarrier) {
// Rely on the type resolution or initialization and marking to save everything we need.
- // Note that IP0 may be clobbered by saving/restoring the live register (only one thanks
- // to the custom calling convention) or by marking, so we shall use IP1.
+ locations->AddTemp(FixedTempLocation());
RegisterSet caller_saves = RegisterSet::Empty();
InvokeRuntimeCallingConvention calling_convention;
caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
@@ -4866,11 +4943,7 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA
// Add ADRP with its PC-relative Class .bss entry patch.
const DexFile& dex_file = cls->GetDexFile();
dex::TypeIndex type_index = cls->GetTypeIndex();
- // We can go to slow path even with non-zero reference and in that case marking
- // can clobber IP0, so we need to use IP1 which shall be preserved.
- bss_entry_temp = ip1;
- UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
- temps.Exclude(bss_entry_temp);
+ bss_entry_temp = XRegisterFrom(cls->GetLocations()->GetTemp(0));
bss_entry_adrp_label = codegen_->NewBssEntryTypePatch(dex_file, type_index);
codegen_->EmitAdrpPlaceholder(bss_entry_adrp_label, bss_entry_temp);
// Add LDR with its PC-relative Class patch.
@@ -4977,8 +5050,7 @@ void LocationsBuilderARM64::VisitLoadString(HLoadString* load) {
if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
if (!kUseReadBarrier || kUseBakerReadBarrier) {
// Rely on the pResolveString and marking to save everything we need.
- // Note that IP0 may be clobbered by saving/restoring the live register (only one thanks
- // to the custom calling convention) or by marking, so we shall use IP1.
+ locations->AddTemp(FixedTempLocation());
RegisterSet caller_saves = RegisterSet::Empty();
InvokeRuntimeCallingConvention calling_convention;
caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
@@ -5029,11 +5101,7 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD
const DexFile& dex_file = load->GetDexFile();
const dex::StringIndex string_index = load->GetStringIndex();
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
- // We could use IP0 as the marking shall not clobber IP0 if the reference is null and
- // that's when we need the slow path. But let's not rely on such details and use IP1.
- Register temp = ip1;
- UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
- temps.Exclude(temp);
+ Register temp = XRegisterFrom(load->GetLocations()->GetTemp(0));
vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index);
codegen_->EmitAdrpPlaceholder(adrp_label, temp);
// Add LDR with its PC-relative String patch.
@@ -5468,7 +5536,7 @@ void InstructionCodeGeneratorARM64::VisitSub(HSub* instruction) {
}
void LocationsBuilderARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
- HandleFieldGet(instruction);
+ HandleFieldGet(instruction, instruction->GetFieldInfo());
}
void InstructionCodeGeneratorARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
@@ -5777,7 +5845,6 @@ void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister(
Register out_reg = RegisterFrom(out, type);
if (read_barrier_option == kWithReadBarrier) {
CHECK(kEmitCompilerReadBarrier);
- Register temp_reg = RegisterFrom(maybe_temp, type);
if (kUseBakerReadBarrier) {
// Load with fast path based Baker's read barrier.
// /* HeapReference<Object> */ out = *(out + offset)
@@ -5785,7 +5852,7 @@ void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister(
out,
out_reg,
offset,
- temp_reg,
+ maybe_temp,
/* needs_null_check */ false,
/* use_load_acquire */ false);
} else {
@@ -5793,6 +5860,7 @@ void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister(
// Save the value of `out` into `maybe_temp` before overwriting it
// in the following move operation, as we will need it for the
// read barrier below.
+ Register temp_reg = RegisterFrom(maybe_temp, type);
__ Mov(temp_reg, out_reg);
// /* HeapReference<Object> */ out = *(out + offset)
__ Ldr(out_reg, HeapOperand(out_reg, offset));
@@ -5820,13 +5888,12 @@ void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters(
CHECK(kEmitCompilerReadBarrier);
if (kUseBakerReadBarrier) {
// Load with fast path based Baker's read barrier.
- Register temp_reg = RegisterFrom(maybe_temp, type);
// /* HeapReference<Object> */ out = *(obj + offset)
codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
out,
obj_reg,
offset,
- temp_reg,
+ maybe_temp,
/* needs_null_check */ false,
/* use_load_acquire */ false);
} else {
@@ -5857,52 +5924,97 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(
if (kUseBakerReadBarrier) {
// Fast path implementation of art::ReadBarrier::BarrierForRoot when
// Baker's read barrier are used.
- //
- // Note that we do not actually check the value of
- // `GetIsGcMarking()` to decide whether to mark the loaded GC
- // root or not. Instead, we load into `temp` the read barrier
- // mark entry point corresponding to register `root`. If `temp`
- // is null, it means that `GetIsGcMarking()` is false, and vice
- // versa.
- //
- // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
- // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
- // // Slow path.
- // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call.
- // }
-
- // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`.
- Register temp = lr;
- SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(
- instruction, root, /* entrypoint */ LocationFrom(temp));
- codegen_->AddSlowPath(slow_path);
-
- // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- const int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(root.reg());
- // Loading the entrypoint does not require a load acquire since it is only changed when
- // threads are suspended or running a checkpoint.
- __ Ldr(temp, MemOperand(tr, entry_point_offset));
+ if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots &&
+ !Runtime::Current()->UseJitCompilation()) {
+ // Note that we do not actually check the value of `GetIsGcMarking()`
+ // to decide whether to mark the loaded GC root or not. Instead, we
+ // load into `temp` the read barrier mark introspection entrypoint.
+ // If `temp` is null, it means that `GetIsGcMarking()` is false, and
+ // vice versa.
+ //
+ // We use link-time generated thunks for the slow path. That thunk
+ // checks the reference and jumps to the entrypoint if needed.
+ //
+ // temp = Thread::Current()->pReadBarrierMarkIntrospection
+ // lr = &return_address;
+ // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
+ // if (temp != nullptr) {
+ // goto gc_root_thunk<root_reg>(lr)
+ // }
+ // return_address:
- // /* GcRoot<mirror::Object> */ root = *(obj + offset)
- if (fixup_label == nullptr) {
- __ Ldr(root_reg, MemOperand(obj, offset));
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ DCHECK(temps.IsAvailable(ip0));
+ DCHECK(temps.IsAvailable(ip1));
+ temps.Exclude(ip0, ip1);
+ uint32_t custom_data =
+ linker::Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg.GetCode());
+ vixl::aarch64::Label* cbnz_label = codegen_->NewBakerReadBarrierPatch(custom_data);
+
+ // ip1 = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
+ DCHECK_EQ(ip0.GetCode(), 16u);
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode());
+ __ Ldr(ip1, MemOperand(tr, entry_point_offset));
+ EmissionCheckScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize);
+ vixl::aarch64::Label return_address;
+ __ adr(lr, &return_address);
+ if (fixup_label != nullptr) {
+ __ Bind(fixup_label);
+ }
+ static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8,
+ "GC root LDR must be 2 instruction (8B) before the return address label.");
+ __ ldr(root_reg, MemOperand(obj.X(), offset));
+ __ Bind(cbnz_label);
+ __ cbnz(ip1, static_cast<int64_t>(0)); // Placeholder, patched at link-time.
+ __ Bind(&return_address);
} else {
- codegen_->EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj);
+ // Note that we do not actually check the value of
+ // `GetIsGcMarking()` to decide whether to mark the loaded GC
+ // root or not. Instead, we load into `temp` the read barrier
+ // mark entry point corresponding to register `root`. If `temp`
+ // is null, it means that `GetIsGcMarking()` is false, and vice
+ // versa.
+ //
+ // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
+ // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
+ // // Slow path.
+ // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call.
+ // }
+
+ // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`.
+ Register temp = lr;
+ SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(
+ instruction, root, /* entrypoint */ LocationFrom(temp));
+ codegen_->AddSlowPath(slow_path);
+
+ // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(root.reg());
+ // Loading the entrypoint does not require a load acquire since it is only changed when
+ // threads are suspended or running a checkpoint.
+ __ Ldr(temp, MemOperand(tr, entry_point_offset));
+
+ // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+ if (fixup_label == nullptr) {
+ __ Ldr(root_reg, MemOperand(obj, offset));
+ } else {
+ codegen_->EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj);
+ }
+ static_assert(
+ sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
+ "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
+ "have different sizes.");
+ static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::CompressedReference<mirror::Object> and int32_t "
+ "have different sizes.");
+
+ // The entrypoint is null when the GC is not marking, this prevents one load compared to
+ // checking GetIsGcMarking.
+ __ Cbnz(temp, slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
}
- static_assert(
- sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
- "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
- "have different sizes.");
- static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
- "art::mirror::CompressedReference<mirror::Object> and int32_t "
- "have different sizes.");
-
- // The entrypoint is null when the GC is not marking, this prevents one load compared to
- // checking GetIsGcMarking.
- __ Cbnz(temp, slow_path->GetEntryLabel());
- __ Bind(slow_path->GetExitLabel());
} else {
// GC root loaded through a slow path for read barriers other
// than Baker's.
@@ -5932,13 +6044,80 @@ void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* ins
Location ref,
Register obj,
uint32_t offset,
- Register temp,
+ Location maybe_temp,
bool needs_null_check,
bool use_load_acquire) {
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
+ if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
+ !use_load_acquire &&
+ !Runtime::Current()->UseJitCompilation()) {
+ // Note that we do not actually check the value of `GetIsGcMarking()`
+ // to decide whether to mark the loaded GC root or not. Instead, we
+ // load into `temp` the read barrier mark introspection entrypoint.
+ // If `temp` is null, it means that `GetIsGcMarking()` is false, and
+ // vice versa.
+ //
+ // We use link-time generated thunks for the slow path. That thunk checks
+ // the holder and jumps to the entrypoint if needed. If the holder is not
+ // gray, it creates a fake dependency and returns to the LDR instruction.
+ //
+ // temp = Thread::Current()->pReadBarrierMarkIntrospection
+ // lr = &return_address;
+ // if (temp != nullptr) {
+ // goto field_thunk<holder_reg, base_reg>(lr)
+ // }
+ // not_gray_return_address:
+ // // Original reference load. If the offset is too large to fit
+ // // into LDR, we use an adjusted base register here.
+ // GcRoot<mirror::Object> root = *(obj+offset);
+ // gray_return_address:
+
+ DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
+ Register base = obj;
+ if (offset >= kReferenceLoadMinFarOffset) {
+ DCHECK(maybe_temp.IsRegister());
+ base = WRegisterFrom(maybe_temp);
+ static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
+ __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u)));
+ offset &= (kReferenceLoadMinFarOffset - 1u);
+ }
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ DCHECK(temps.IsAvailable(ip0));
+ DCHECK(temps.IsAvailable(ip1));
+ temps.Exclude(ip0, ip1);
+ uint32_t custom_data = linker::Arm64RelativePatcher::EncodeBakerReadBarrierFieldData(
+ base.GetCode(),
+ obj.GetCode());
+ vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data);
+
+ // ip1 = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
+ DCHECK_EQ(ip0.GetCode(), 16u);
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode());
+ __ Ldr(ip1, MemOperand(tr, entry_point_offset));
+ EmissionCheckScope guard(GetVIXLAssembler(),
+ (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
+ vixl::aarch64::Label return_address;
+ __ adr(lr, &return_address);
+ __ Bind(cbnz_label);
+ __ cbnz(ip1, static_cast<int64_t>(0)); // Placeholder, patched at link-time.
+ static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
+ "Field LDR must be 1 instruction (4B) before the return address label; "
+ " 2 instructions (8B) for heap poisoning.");
+ Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot);
+ __ ldr(ref_reg, MemOperand(base.X(), offset));
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
+ GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
+ __ Bind(&return_address);
+ return;
+ }
+
// /* HeapReference<Object> */ ref = *(obj + offset)
+ Register temp = WRegisterFrom(maybe_temp);
Location no_index = Location::NoLocation();
size_t no_scale_factor = 0u;
GenerateReferenceLoadWithBakerReadBarrier(instruction,
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 869aad2942..58feea2423 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -351,7 +351,7 @@ class LocationsBuilderARM64 : public HGraphVisitor {
private:
void HandleBinaryOp(HBinaryOperation* instr);
void HandleFieldSet(HInstruction* instruction);
- void HandleFieldGet(HInstruction* instruction);
+ void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
void HandleInvoke(HInvoke* instr);
void HandleCondition(HCondition* instruction);
void HandleShift(HBinaryOperation* instr);
@@ -579,6 +579,10 @@ class CodeGeneratorARM64 : public CodeGenerator {
uint32_t element_offset,
vixl::aarch64::Label* adrp_label = nullptr);
+ // Add a new baker read barrier patch and return the label to be bound
+ // before the CBNZ instruction.
+ vixl::aarch64::Label* NewBakerReadBarrierPatch(uint32_t custom_data);
+
vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageStringLiteral(
const DexFile& dex_file,
dex::StringIndex string_index);
@@ -610,7 +614,7 @@ class CodeGeneratorARM64 : public CodeGenerator {
Location ref,
vixl::aarch64::Register obj,
uint32_t offset,
- vixl::aarch64::Register temp,
+ Location maybe_temp,
bool needs_null_check,
bool use_load_acquire);
// Fast path implementation of ReadBarrier::Barrier for a heap
@@ -738,6 +742,13 @@ class CodeGeneratorARM64 : public CodeGenerator {
vixl::aarch64::Label* pc_insn_label;
};
+ struct BakerReadBarrierPatchInfo {
+ explicit BakerReadBarrierPatchInfo(uint32_t data) : label(), custom_data(data) { }
+
+ vixl::aarch64::Label label;
+ uint32_t custom_data;
+ };
+
vixl::aarch64::Label* NewPcRelativePatch(const DexFile& dex_file,
uint32_t offset_or_index,
vixl::aarch64::Label* adrp_label,
@@ -777,6 +788,8 @@ class CodeGeneratorARM64 : public CodeGenerator {
ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
// PC-relative type patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
+ // Baker read barrier patch info.
+ ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_;
// Patches for string literals in JIT compiled code.
StringToLiteralMap jit_string_patches_;
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index cce412b314..b6678b03ef 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -3634,8 +3634,8 @@ void InstructionCodeGeneratorARMVIXL::VisitTypeConversion(HTypeConversion* conve
} else {
DCHECK(in.IsConstant());
DCHECK(in.GetConstant()->IsLongConstant());
- int32_t value = Int32ConstantFrom(in);
- __ Mov(OutputRegister(conversion), value);
+ int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
+ __ Mov(OutputRegister(conversion), static_cast<int32_t>(value));
}
break;
diff --git a/compiler/optimizing/code_generator_vector_arm.cc b/compiler/optimizing/code_generator_vector_arm.cc
index e7f7b3019c..6e82123e56 100644
--- a/compiler/optimizing/code_generator_vector_arm.cc
+++ b/compiler/optimizing/code_generator_vector_arm.cc
@@ -124,6 +124,14 @@ void InstructionCodeGeneratorARM::VisitVecAdd(HVecAdd* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
+void LocationsBuilderARM::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
void LocationsBuilderARM::VisitVecSub(HVecSub* instruction) {
CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
}
@@ -148,6 +156,22 @@ void InstructionCodeGeneratorARM::VisitVecDiv(HVecDiv* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
+void LocationsBuilderARM::VisitVecMin(HVecMin* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecMin(HVecMin* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecMax(HVecMax* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecMax(HVecMax* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
void LocationsBuilderARM::VisitVecAnd(HVecAnd* instruction) {
CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
}
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc
index 0923920366..2dfccfff85 100644
--- a/compiler/optimizing/code_generator_vector_arm64.cc
+++ b/compiler/optimizing/code_generator_vector_arm64.cc
@@ -318,6 +318,47 @@ void InstructionCodeGeneratorARM64::VisitVecAdd(HVecAdd* instruction) {
}
}
+void LocationsBuilderARM64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ VRegister lhs = VRegisterFrom(locations->InAt(0));
+ VRegister rhs = VRegisterFrom(locations->InAt(1));
+ VRegister dst = VRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ instruction->IsRounded()
+ ? __ Urhadd(dst.V16B(), lhs.V16B(), rhs.V16B())
+ : __ Uhadd(dst.V16B(), lhs.V16B(), rhs.V16B());
+ } else {
+ instruction->IsRounded()
+ ? __ Srhadd(dst.V16B(), lhs.V16B(), rhs.V16B())
+ : __ Shadd(dst.V16B(), lhs.V16B(), rhs.V16B());
+ }
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ if (instruction->IsUnsigned()) {
+ instruction->IsRounded()
+ ? __ Urhadd(dst.V8H(), lhs.V8H(), rhs.V8H())
+ : __ Uhadd(dst.V8H(), lhs.V8H(), rhs.V8H());
+ } else {
+ instruction->IsRounded()
+ ? __ Srhadd(dst.V8H(), lhs.V8H(), rhs.V8H())
+ : __ Shadd(dst.V8H(), lhs.V8H(), rhs.V8H());
+ }
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
void LocationsBuilderARM64::VisitVecSub(HVecSub* instruction) {
CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
}
@@ -420,6 +461,22 @@ void InstructionCodeGeneratorARM64::VisitVecDiv(HVecDiv* instruction) {
}
}
+void LocationsBuilderARM64::VisitVecMin(HVecMin* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecMin(HVecMin* instruction) {
+ LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
+}
+
+void LocationsBuilderARM64::VisitVecMax(HVecMax* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecMax(HVecMax* instruction) {
+ LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
+}
+
void LocationsBuilderARM64::VisitVecAnd(HVecAnd* instruction) {
CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
}
diff --git a/compiler/optimizing/code_generator_vector_arm_vixl.cc b/compiler/optimizing/code_generator_vector_arm_vixl.cc
index 74fa584e09..990178b31b 100644
--- a/compiler/optimizing/code_generator_vector_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_vector_arm_vixl.cc
@@ -124,6 +124,14 @@ void InstructionCodeGeneratorARMVIXL::VisitVecAdd(HVecAdd* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
+void LocationsBuilderARMVIXL::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
void LocationsBuilderARMVIXL::VisitVecSub(HVecSub* instruction) {
CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
}
@@ -148,6 +156,22 @@ void InstructionCodeGeneratorARMVIXL::VisitVecDiv(HVecDiv* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
+void LocationsBuilderARMVIXL::VisitVecMin(HVecMin* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecMin(HVecMin* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecMax(HVecMax* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecMax(HVecMax* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
void LocationsBuilderARMVIXL::VisitVecAnd(HVecAnd* instruction) {
CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
}
diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc
index 6969abd422..8ea1ca7d90 100644
--- a/compiler/optimizing/code_generator_vector_mips.cc
+++ b/compiler/optimizing/code_generator_vector_mips.cc
@@ -124,6 +124,14 @@ void InstructionCodeGeneratorMIPS::VisitVecAdd(HVecAdd* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
+void LocationsBuilderMIPS::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
void LocationsBuilderMIPS::VisitVecSub(HVecSub* instruction) {
CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
}
@@ -148,6 +156,22 @@ void InstructionCodeGeneratorMIPS::VisitVecDiv(HVecDiv* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
+void LocationsBuilderMIPS::VisitVecMin(HVecMin* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecMin(HVecMin* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecMax(HVecMax* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecMax(HVecMax* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
void LocationsBuilderMIPS::VisitVecAnd(HVecAnd* instruction) {
CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
}
diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc
index 87118cefa5..a484bb4774 100644
--- a/compiler/optimizing/code_generator_vector_mips64.cc
+++ b/compiler/optimizing/code_generator_vector_mips64.cc
@@ -124,6 +124,14 @@ void InstructionCodeGeneratorMIPS64::VisitVecAdd(HVecAdd* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
+void LocationsBuilderMIPS64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
void LocationsBuilderMIPS64::VisitVecSub(HVecSub* instruction) {
CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
}
@@ -148,6 +156,22 @@ void InstructionCodeGeneratorMIPS64::VisitVecDiv(HVecDiv* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
+void LocationsBuilderMIPS64::VisitVecMin(HVecMin* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecMin(HVecMin* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecMax(HVecMax* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecMax(HVecMax* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
void LocationsBuilderMIPS64::VisitVecAnd(HVecAnd* instruction) {
CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
}
diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc
index 8dabb4d08f..a86d060821 100644
--- a/compiler/optimizing/code_generator_vector_x86.cc
+++ b/compiler/optimizing/code_generator_vector_x86.cc
@@ -350,6 +350,35 @@ void InstructionCodeGeneratorX86::VisitVecAdd(HVecAdd* instruction) {
}
}
+void LocationsBuilderX86::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+
+ DCHECK(instruction->IsRounded());
+ DCHECK(instruction->IsUnsigned());
+
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ pavgb(dst, src);
+ return;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ pavgw(dst, src);
+ return;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
void LocationsBuilderX86::VisitVecSub(HVecSub* instruction) {
CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
}
@@ -448,6 +477,22 @@ void InstructionCodeGeneratorX86::VisitVecDiv(HVecDiv* instruction) {
}
}
+void LocationsBuilderX86::VisitVecMin(HVecMin* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecMin(HVecMin* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderX86::VisitVecMax(HVecMax* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecMax(HVecMax* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
void LocationsBuilderX86::VisitVecAnd(HVecAnd* instruction) {
CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
}
diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc
index e95608839b..696735367e 100644
--- a/compiler/optimizing/code_generator_vector_x86_64.cc
+++ b/compiler/optimizing/code_generator_vector_x86_64.cc
@@ -343,6 +343,31 @@ void InstructionCodeGeneratorX86_64::VisitVecAdd(HVecAdd* instruction) {
}
}
+void LocationsBuilderX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ pavgb(dst, src);
+ return;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ pavgw(dst, src);
+ return;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
void LocationsBuilderX86_64::VisitVecSub(HVecSub* instruction) {
CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
}
@@ -441,6 +466,22 @@ void InstructionCodeGeneratorX86_64::VisitVecDiv(HVecDiv* instruction) {
}
}
+void LocationsBuilderX86_64::VisitVecMin(HVecMin* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecMin(HVecMin* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderX86_64::VisitVecMax(HVecMax* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecMax(HVecMax* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
void LocationsBuilderX86_64::VisitVecAnd(HVecAnd* instruction) {
CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
}
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index cc3c143b15..1b2b9f80ac 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -509,6 +509,11 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
StartAttributeStream("kind") << deoptimize->GetKind();
}
+ void VisitVecHalvingAdd(HVecHalvingAdd* hadd) OVERRIDE {
+ StartAttributeStream("unsigned") << std::boolalpha << hadd->IsUnsigned() << std::noboolalpha;
+ StartAttributeStream("rounded") << std::boolalpha << hadd->IsRounded() << std::noboolalpha;
+ }
+
#if defined(ART_ENABLE_CODEGEN_arm) || defined(ART_ENABLE_CODEGEN_arm64)
void VisitMultiplyAccumulate(HMultiplyAccumulate* instruction) OVERRIDE {
StartAttributeStream("kind") << instruction->GetOpKind();
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index 1c8674d522..7c833cf70c 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -45,18 +45,6 @@ static bool IsSafeDiv(int32_t c1, int32_t c2) {
return c2 != 0 && CanLongValueFitIntoInt(static_cast<int64_t>(c1) / static_cast<int64_t>(c2));
}
-/** Returns true for 32/64-bit constant instruction. */
-static bool IsIntAndGet(HInstruction* instruction, int64_t* value) {
- if (instruction->IsIntConstant()) {
- *value = instruction->AsIntConstant()->GetValue();
- return true;
- } else if (instruction->IsLongConstant()) {
- *value = instruction->AsLongConstant()->GetValue();
- return true;
- }
- return false;
-}
-
/** Computes a * b for a,b > 0 (at least until first overflow happens). */
static int64_t SafeMul(int64_t a, int64_t b, /*out*/ bool* overflow) {
if (a > 0 && b > 0 && a > (std::numeric_limits<int64_t>::max() / b)) {
@@ -106,7 +94,7 @@ static bool IsGEZero(HInstruction* instruction) {
}
}
int64_t value = -1;
- return IsIntAndGet(instruction, &value) && value >= 0;
+ return IsInt64AndGet(instruction, &value) && value >= 0;
}
/** Hunts "under the hood" for a suitable instruction at the hint. */
@@ -149,7 +137,7 @@ static InductionVarRange::Value SimplifyMax(InductionVarRange::Value v, HInstruc
int64_t value;
if (v.instruction->IsDiv() &&
v.instruction->InputAt(0)->IsArrayLength() &&
- IsIntAndGet(v.instruction->InputAt(1), &value) && v.a_constant == value) {
+ IsInt64AndGet(v.instruction->InputAt(1), &value) && v.a_constant == value) {
return InductionVarRange::Value(v.instruction->InputAt(0), 1, v.b_constant);
}
// If a == 1, the most suitable one suffices as maximum value.
@@ -444,7 +432,7 @@ bool InductionVarRange::IsConstant(HInductionVarAnalysis::InductionInfo* info,
// any of the three requests (kExact, kAtMost, and KAtLeast).
if (info->induction_class == HInductionVarAnalysis::kInvariant &&
info->operation == HInductionVarAnalysis::kFetch) {
- if (IsIntAndGet(info->fetch, value)) {
+ if (IsInt64AndGet(info->fetch, value)) {
return true;
}
}
@@ -635,7 +623,7 @@ InductionVarRange::Value InductionVarRange::GetGeometric(HInductionVarAnalysis::
int64_t f = 0;
if (IsConstant(info->op_a, kExact, &a) &&
CanLongValueFitIntoInt(a) &&
- IsIntAndGet(info->fetch, &f) && f >= 1) {
+ IsInt64AndGet(info->fetch, &f) && f >= 1) {
// Conservative bounds on a * f^-i + b with f >= 1 can be computed without
// trip count. Other forms would require a much more elaborate evaluation.
const bool is_min_a = a >= 0 ? is_min : !is_min;
@@ -663,7 +651,7 @@ InductionVarRange::Value InductionVarRange::GetFetch(HInstruction* instruction,
// Unless at a constant or hint, chase the instruction a bit deeper into the HIR tree, so that
// it becomes more likely range analysis will compare the same instructions as terminal nodes.
int64_t value;
- if (IsIntAndGet(instruction, &value) && CanLongValueFitIntoInt(value)) {
+ if (IsInt64AndGet(instruction, &value) && CanLongValueFitIntoInt(value)) {
// Proper constant reveals best information.
return Value(static_cast<int32_t>(value));
} else if (instruction == chase_hint_) {
@@ -671,10 +659,10 @@ InductionVarRange::Value InductionVarRange::GetFetch(HInstruction* instruction,
return Value(instruction, 1, 0);
} else if (instruction->IsAdd()) {
// Incorporate suitable constants in the chased value.
- if (IsIntAndGet(instruction->InputAt(0), &value) && CanLongValueFitIntoInt(value)) {
+ if (IsInt64AndGet(instruction->InputAt(0), &value) && CanLongValueFitIntoInt(value)) {
return AddValue(Value(static_cast<int32_t>(value)),
GetFetch(instruction->InputAt(1), trip, in_body, is_min));
- } else if (IsIntAndGet(instruction->InputAt(1), &value) && CanLongValueFitIntoInt(value)) {
+ } else if (IsInt64AndGet(instruction->InputAt(1), &value) && CanLongValueFitIntoInt(value)) {
return AddValue(GetFetch(instruction->InputAt(0), trip, in_body, is_min),
Value(static_cast<int32_t>(value)));
}
@@ -1074,7 +1062,7 @@ bool InductionVarRange::GenerateLastValueGeometric(HInductionVarAnalysis::Induct
// Detect known base and trip count (always taken).
int64_t f = 0;
int64_t m = 0;
- if (IsIntAndGet(info->fetch, &f) && f >= 1 && IsConstant(trip->op_a, kExact, &m) && m >= 1) {
+ if (IsInt64AndGet(info->fetch, &f) && f >= 1 && IsConstant(trip->op_a, kExact, &m) && m >= 1) {
HInstruction* opa = nullptr;
HInstruction* opb = nullptr;
if (GenerateCode(info->op_a, nullptr, graph, block, &opa, false, false) &&
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 423fd3c6ae..47bcb5d000 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -2507,9 +2507,11 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
// We use a block to end the scratch scope before the write barrier, thus
// freeing the temporary registers so they can be used in `MarkGCCard`.
UseScratchRegisterScope temps(masm);
+ Location temp3_loc; // Used only for Baker read barrier.
Register temp3;
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
- temp3 = WRegisterFrom(locations->GetTemp(2));
+ temp3_loc = locations->GetTemp(2);
+ temp3 = WRegisterFrom(temp3_loc);
} else {
temp3 = temps.AcquireW();
}
@@ -2527,7 +2529,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
temp1_loc,
src.W(),
class_offset,
- temp2,
+ temp3_loc,
/* needs_null_check */ false,
/* use_load_acquire */ false);
// Bail out if the source is not a non primitive array.
@@ -2536,7 +2538,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
temp1_loc,
temp1,
component_offset,
- temp2,
+ temp3_loc,
/* needs_null_check */ false,
/* use_load_acquire */ false);
__ Cbz(temp1, intrinsic_slow_path->GetEntryLabel());
@@ -2553,7 +2555,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
temp1_loc,
dest.W(),
class_offset,
- temp2,
+ temp3_loc,
/* needs_null_check */ false,
/* use_load_acquire */ false);
@@ -2570,7 +2572,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
temp2_loc,
temp1,
component_offset,
- temp3,
+ temp3_loc,
/* needs_null_check */ false,
/* use_load_acquire */ false);
__ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
@@ -2589,7 +2591,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
temp2_loc,
src.W(),
class_offset,
- temp3,
+ temp3_loc,
/* needs_null_check */ false,
/* use_load_acquire */ false);
// Note: if heap poisoning is on, we are comparing two unpoisoned references here.
@@ -2603,7 +2605,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
temp1_loc,
temp1,
component_offset,
- temp2,
+ temp3_loc,
/* needs_null_check */ false,
/* use_load_acquire */ false);
// /* HeapReference<Class> */ temp1 = temp1->super_class_
@@ -2687,7 +2689,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
temp1_loc,
src.W(),
class_offset,
- temp2,
+ temp3_loc,
/* needs_null_check */ false,
/* use_load_acquire */ false);
// /* HeapReference<Class> */ temp2 = temp1->component_type_
@@ -2695,7 +2697,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
temp2_loc,
temp1,
component_offset,
- temp3,
+ temp3_loc,
/* needs_null_check */ false,
/* use_load_acquire */ false);
__ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
@@ -2755,9 +2757,17 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
// Make sure `tmp` is not IP0, as it is clobbered by
// ReadBarrierMarkRegX entry points in
// ReadBarrierSystemArrayCopySlowPathARM64.
+ DCHECK(temps.IsAvailable(ip0));
temps.Exclude(ip0);
Register tmp = temps.AcquireW();
DCHECK_NE(LocationFrom(tmp).reg(), IP0);
+ // Put IP0 back in the pool so that VIXL has at least one
+ // scratch register available to emit macro-instructions (note
+ // that IP1 is already used for `tmp`). Indeed some
+ // macro-instructions used in GenSystemArrayCopyAddresses
+ // (invoked hereunder) may require a scratch register (for
+ // instance to emit a load with a large constant offset).
+ temps.Include(ip0);
// /* int32_t */ monitor = src->monitor_
__ Ldr(tmp, HeapOperand(src.W(), monitor_offset));
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 900b00e222..41df56b514 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -2742,6 +2742,397 @@ void IntrinsicCodeGeneratorMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) {
__ Bind(&done);
}
+static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kCallOnMainOnly,
+ kIntrinsified);
+ InvokeRuntimeCallingConvention calling_convention;
+
+ locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+ locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimDouble));
+}
+
+static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kCallOnMainOnly,
+ kIntrinsified);
+ InvokeRuntimeCallingConvention calling_convention;
+
+ locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+ locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
+ locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimDouble));
+}
+
+static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorMIPS* codegen, QuickEntrypointEnum entry) {
+ LocationSummary* locations = invoke->GetLocations();
+ FRegister in = locations->InAt(0).AsFpuRegister<FRegister>();
+ DCHECK_EQ(in, F12);
+ FRegister out = locations->Out().AsFpuRegister<FRegister>();
+ DCHECK_EQ(out, F0);
+
+ codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
+}
+
+static void GenFPFPToFPCall(HInvoke* invoke,
+ CodeGeneratorMIPS* codegen,
+ QuickEntrypointEnum entry) {
+ LocationSummary* locations = invoke->GetLocations();
+ FRegister in0 = locations->InAt(0).AsFpuRegister<FRegister>();
+ DCHECK_EQ(in0, F12);
+ FRegister in1 = locations->InAt(1).AsFpuRegister<FRegister>();
+ DCHECK_EQ(in1, F14);
+ FRegister out = locations->Out().AsFpuRegister<FRegister>();
+ DCHECK_EQ(out, F0);
+
+ codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
+}
+
+// static double java.lang.Math.cos(double a)
+void IntrinsicLocationsBuilderMIPS::VisitMathCos(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathCos(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickCos);
+}
+
+// static double java.lang.Math.sin(double a)
+void IntrinsicLocationsBuilderMIPS::VisitMathSin(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathSin(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickSin);
+}
+
+// static double java.lang.Math.acos(double a)
+void IntrinsicLocationsBuilderMIPS::VisitMathAcos(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathAcos(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickAcos);
+}
+
+// static double java.lang.Math.asin(double a)
+void IntrinsicLocationsBuilderMIPS::VisitMathAsin(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathAsin(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickAsin);
+}
+
+// static double java.lang.Math.atan(double a)
+void IntrinsicLocationsBuilderMIPS::VisitMathAtan(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathAtan(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickAtan);
+}
+
+// static double java.lang.Math.atan2(double y, double x)
+void IntrinsicLocationsBuilderMIPS::VisitMathAtan2(HInvoke* invoke) {
+ CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathAtan2(HInvoke* invoke) {
+ GenFPFPToFPCall(invoke, codegen_, kQuickAtan2);
+}
+
+// static double java.lang.Math.cbrt(double a)
+void IntrinsicLocationsBuilderMIPS::VisitMathCbrt(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathCbrt(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickCbrt);
+}
+
+// static double java.lang.Math.cosh(double x)
+void IntrinsicLocationsBuilderMIPS::VisitMathCosh(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathCosh(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickCosh);
+}
+
+// static double java.lang.Math.exp(double a)
+void IntrinsicLocationsBuilderMIPS::VisitMathExp(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathExp(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickExp);
+}
+
+// static double java.lang.Math.expm1(double x)
+void IntrinsicLocationsBuilderMIPS::VisitMathExpm1(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathExpm1(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickExpm1);
+}
+
+// static double java.lang.Math.hypot(double x, double y)
+void IntrinsicLocationsBuilderMIPS::VisitMathHypot(HInvoke* invoke) {
+ CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathHypot(HInvoke* invoke) {
+ GenFPFPToFPCall(invoke, codegen_, kQuickHypot);
+}
+
+// static double java.lang.Math.log(double a)
+void IntrinsicLocationsBuilderMIPS::VisitMathLog(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathLog(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickLog);
+}
+
+// static double java.lang.Math.log10(double x)
+void IntrinsicLocationsBuilderMIPS::VisitMathLog10(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathLog10(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickLog10);
+}
+
+// static double java.lang.Math.nextAfter(double start, double direction)
+void IntrinsicLocationsBuilderMIPS::VisitMathNextAfter(HInvoke* invoke) {
+ CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathNextAfter(HInvoke* invoke) {
+ GenFPFPToFPCall(invoke, codegen_, kQuickNextAfter);
+}
+
+// static double java.lang.Math.sinh(double x)
+void IntrinsicLocationsBuilderMIPS::VisitMathSinh(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathSinh(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickSinh);
+}
+
+// static double java.lang.Math.tan(double a)
+void IntrinsicLocationsBuilderMIPS::VisitMathTan(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathTan(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickTan);
+}
+
+// static double java.lang.Math.tanh(double x)
+void IntrinsicLocationsBuilderMIPS::VisitMathTanh(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathTanh(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickTanh);
+}
+
+// static void java.lang.System.arraycopy(Object src, int srcPos,
+// Object dest, int destPos,
+// int length)
+void IntrinsicLocationsBuilderMIPS::VisitSystemArrayCopyChar(HInvoke* invoke) {
+ HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
+ HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
+ HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
+
+ // As long as we are checking, we might as well check to see if the src and dest
+ // positions are >= 0.
+ if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
+ (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
+ // We will have to fail anyways.
+ return;
+ }
+
+ // And since we are already checking, check the length too.
+ if (length != nullptr) {
+ int32_t len = length->GetValue();
+ if (len < 0) {
+ // Just call as normal.
+ return;
+ }
+ }
+
+ // Okay, it is safe to generate inline code.
+ LocationSummary* locations =
+ new (arena_) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
+ // arraycopy(Object src, int srcPos, Object dest, int destPos, int length).
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
+ locations->SetInAt(2, Location::RequiresRegister());
+ locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
+ locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
+
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+}
+
+// Utility routine to verify that "length(input) - pos >= length"
+static void EnoughItems(MipsAssembler* assembler,
+ Register length_input_minus_pos,
+ Location length,
+ SlowPathCodeMIPS* slow_path) {
+ if (length.IsConstant()) {
+ int32_t length_constant = length.GetConstant()->AsIntConstant()->GetValue();
+
+ if (IsInt<16>(length_constant)) {
+ __ Slti(TMP, length_input_minus_pos, length_constant);
+ __ Bnez(TMP, slow_path->GetEntryLabel());
+ } else {
+ __ LoadConst32(TMP, length_constant);
+ __ Blt(length_input_minus_pos, TMP, slow_path->GetEntryLabel());
+ }
+ } else {
+ __ Blt(length_input_minus_pos, length.AsRegister<Register>(), slow_path->GetEntryLabel());
+ }
+}
+
+static void CheckPosition(MipsAssembler* assembler,
+ Location pos,
+ Register input,
+ Location length,
+ SlowPathCodeMIPS* slow_path,
+ bool length_is_input_length = false) {
+ // Where is the length in the Array?
+ const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
+
+ // Calculate length(input) - pos.
+ if (pos.IsConstant()) {
+ int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
+ if (pos_const == 0) {
+ if (!length_is_input_length) {
+ // Check that length(input) >= length.
+ __ LoadFromOffset(kLoadWord, AT, input, length_offset);
+ EnoughItems(assembler, AT, length, slow_path);
+ }
+ } else {
+ // Check that (length(input) - pos) >= zero.
+ __ LoadFromOffset(kLoadWord, AT, input, length_offset);
+ DCHECK_GT(pos_const, 0);
+ __ Addiu32(AT, AT, -pos_const, TMP);
+ __ Bltz(AT, slow_path->GetEntryLabel());
+
+ // Verify that (length(input) - pos) >= length.
+ EnoughItems(assembler, AT, length, slow_path);
+ }
+ } else if (length_is_input_length) {
+ // The only way the copy can succeed is if pos is zero.
+ Register pos_reg = pos.AsRegister<Register>();
+ __ Bnez(pos_reg, slow_path->GetEntryLabel());
+ } else {
+ // Verify that pos >= 0.
+ Register pos_reg = pos.AsRegister<Register>();
+ __ Bltz(pos_reg, slow_path->GetEntryLabel());
+
+ // Check that (length(input) - pos) >= zero.
+ __ LoadFromOffset(kLoadWord, AT, input, length_offset);
+ __ Subu(AT, AT, pos_reg);
+ __ Bltz(AT, slow_path->GetEntryLabel());
+
+ // Verify that (length(input) - pos) >= length.
+ EnoughItems(assembler, AT, length, slow_path);
+ }
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitSystemArrayCopyChar(HInvoke* invoke) {
+ MipsAssembler* assembler = GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
+
+ Register src = locations->InAt(0).AsRegister<Register>();
+ Location src_pos = locations->InAt(1);
+ Register dest = locations->InAt(2).AsRegister<Register>();
+ Location dest_pos = locations->InAt(3);
+ Location length = locations->InAt(4);
+
+ MipsLabel loop;
+
+ Register dest_base = locations->GetTemp(0).AsRegister<Register>();
+ Register src_base = locations->GetTemp(1).AsRegister<Register>();
+ Register count = locations->GetTemp(2).AsRegister<Register>();
+
+ SlowPathCodeMIPS* slow_path = new (GetAllocator()) IntrinsicSlowPathMIPS(invoke);
+ codegen_->AddSlowPath(slow_path);
+
+ // Bail out if the source and destination are the same (to handle overlap).
+ __ Beq(src, dest, slow_path->GetEntryLabel());
+
+ // Bail out if the source is null.
+ __ Beqz(src, slow_path->GetEntryLabel());
+
+ // Bail out if the destination is null.
+ __ Beqz(dest, slow_path->GetEntryLabel());
+
+ // Load length into register for count.
+ if (length.IsConstant()) {
+ __ LoadConst32(count, length.GetConstant()->AsIntConstant()->GetValue());
+ } else {
+ // If the length is negative, bail out.
+ // We have already checked in the LocationsBuilder for the constant case.
+ __ Bltz(length.AsRegister<Register>(), slow_path->GetEntryLabel());
+
+ __ Move(count, length.AsRegister<Register>());
+ }
+
+ // Validity checks: source.
+ CheckPosition(assembler, src_pos, src, Location::RegisterLocation(count), slow_path);
+
+ // Validity checks: dest.
+ CheckPosition(assembler, dest_pos, dest, Location::RegisterLocation(count), slow_path);
+
+ // If count is zero, we're done.
+ __ Beqz(count, slow_path->GetExitLabel());
+
+ // Okay, everything checks out. Finally time to do the copy.
+ // Check assumption that sizeof(Char) is 2 (used in scaling below).
+ const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
+ DCHECK_EQ(char_size, 2u);
+
+ const size_t char_shift = Primitive::ComponentSizeShift(Primitive::kPrimChar);
+
+ const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
+
+ // Calculate source and destination addresses.
+ if (src_pos.IsConstant()) {
+ int32_t src_pos_const = src_pos.GetConstant()->AsIntConstant()->GetValue();
+
+ __ Addiu32(src_base, src, data_offset + char_size * src_pos_const, TMP);
+ } else {
+ __ Addiu32(src_base, src, data_offset, TMP);
+ __ ShiftAndAdd(src_base, src_pos.AsRegister<Register>(), src_base, char_shift);
+ }
+ if (dest_pos.IsConstant()) {
+ int32_t dest_pos_const = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+
+ __ Addiu32(dest_base, dest, data_offset + char_size * dest_pos_const, TMP);
+ } else {
+ __ Addiu32(dest_base, dest, data_offset, TMP);
+ __ ShiftAndAdd(dest_base, dest_pos.AsRegister<Register>(), dest_base, char_shift);
+ }
+
+ __ Bind(&loop);
+ __ Lh(TMP, src_base, 0);
+ __ Addiu(src_base, src_base, char_size);
+ __ Addiu(count, count, -1);
+ __ Sh(TMP, dest_base, 0);
+ __ Addiu(dest_base, dest_base, char_size);
+ __ Bnez(count, &loop);
+
+ __ Bind(slow_path->GetExitLabel());
+}
+
// Unimplemented intrinsics.
UNIMPLEMENTED_INTRINSIC(MIPS, MathCeil)
@@ -2753,27 +3144,8 @@ UNIMPLEMENTED_INTRINSIC(MIPS, UnsafePutLongVolatile);
UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeCASLong)
UNIMPLEMENTED_INTRINSIC(MIPS, ReferenceGetReferent)
-UNIMPLEMENTED_INTRINSIC(MIPS, SystemArrayCopyChar)
UNIMPLEMENTED_INTRINSIC(MIPS, SystemArrayCopy)
-UNIMPLEMENTED_INTRINSIC(MIPS, MathCos)
-UNIMPLEMENTED_INTRINSIC(MIPS, MathSin)
-UNIMPLEMENTED_INTRINSIC(MIPS, MathAcos)
-UNIMPLEMENTED_INTRINSIC(MIPS, MathAsin)
-UNIMPLEMENTED_INTRINSIC(MIPS, MathAtan)
-UNIMPLEMENTED_INTRINSIC(MIPS, MathAtan2)
-UNIMPLEMENTED_INTRINSIC(MIPS, MathCbrt)
-UNIMPLEMENTED_INTRINSIC(MIPS, MathCosh)
-UNIMPLEMENTED_INTRINSIC(MIPS, MathExp)
-UNIMPLEMENTED_INTRINSIC(MIPS, MathExpm1)
-UNIMPLEMENTED_INTRINSIC(MIPS, MathHypot)
-UNIMPLEMENTED_INTRINSIC(MIPS, MathLog)
-UNIMPLEMENTED_INTRINSIC(MIPS, MathLog10)
-UNIMPLEMENTED_INTRINSIC(MIPS, MathNextAfter)
-UNIMPLEMENTED_INTRINSIC(MIPS, MathSinh)
-UNIMPLEMENTED_INTRINSIC(MIPS, MathTan)
-UNIMPLEMENTED_INTRINSIC(MIPS, MathTanh)
-
UNIMPLEMENTED_INTRINSIC(MIPS, StringStringIndexOf);
UNIMPLEMENTED_INTRINSIC(MIPS, StringStringIndexOfAfter);
UNIMPLEMENTED_INTRINSIC(MIPS, StringBufferAppend);
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index c2518a7861..b57b41f686 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -2093,6 +2093,199 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
__ Bind(&done);
}
+// static void java.lang.System.arraycopy(Object src, int srcPos,
+// Object dest, int destPos,
+// int length)
+void IntrinsicLocationsBuilderMIPS64::VisitSystemArrayCopyChar(HInvoke* invoke) {
+ HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
+ HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
+ HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
+
+ // As long as we are checking, we might as well check to see if the src and dest
+ // positions are >= 0.
+ if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
+ (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
+ // We will have to fail anyways.
+ return;
+ }
+
+ // And since we are already checking, check the length too.
+ if (length != nullptr) {
+ int32_t len = length->GetValue();
+ if (len < 0) {
+ // Just call as normal.
+ return;
+ }
+ }
+
+ // Okay, it is safe to generate inline code.
+ LocationSummary* locations =
+ new (arena_) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
+ // arraycopy(Object src, int srcPos, Object dest, int destPos, int length).
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
+ locations->SetInAt(2, Location::RequiresRegister());
+ locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
+ locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
+
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+}
+
+// Utility routine to verify that "length(input) - pos >= length"
+static void EnoughItems(Mips64Assembler* assembler,
+ GpuRegister length_input_minus_pos,
+ Location length,
+ SlowPathCodeMIPS64* slow_path) {
+ if (length.IsConstant()) {
+ int32_t length_constant = length.GetConstant()->AsIntConstant()->GetValue();
+
+ if (IsInt<16>(length_constant)) {
+ __ Slti(TMP, length_input_minus_pos, length_constant);
+ __ Bnezc(TMP, slow_path->GetEntryLabel());
+ } else {
+ __ LoadConst32(TMP, length_constant);
+ __ Bltc(length_input_minus_pos, TMP, slow_path->GetEntryLabel());
+ }
+ } else {
+ __ Bltc(length_input_minus_pos, length.AsRegister<GpuRegister>(), slow_path->GetEntryLabel());
+ }
+}
+
+static void CheckPosition(Mips64Assembler* assembler,
+ Location pos,
+ GpuRegister input,
+ Location length,
+ SlowPathCodeMIPS64* slow_path,
+ bool length_is_input_length = false) {
+ // Where is the length in the Array?
+ const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
+
+ // Calculate length(input) - pos.
+ if (pos.IsConstant()) {
+ int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
+ if (pos_const == 0) {
+ if (!length_is_input_length) {
+ // Check that length(input) >= length.
+ __ LoadFromOffset(kLoadWord, AT, input, length_offset);
+ EnoughItems(assembler, AT, length, slow_path);
+ }
+ } else {
+ // Check that (length(input) - pos) >= zero.
+ __ LoadFromOffset(kLoadWord, AT, input, length_offset);
+ DCHECK_GT(pos_const, 0);
+ __ Addiu32(AT, AT, -pos_const);
+ __ Bltzc(AT, slow_path->GetEntryLabel());
+
+ // Verify that (length(input) - pos) >= length.
+ EnoughItems(assembler, AT, length, slow_path);
+ }
+ } else if (length_is_input_length) {
+ // The only way the copy can succeed is if pos is zero.
+ GpuRegister pos_reg = pos.AsRegister<GpuRegister>();
+ __ Bnezc(pos_reg, slow_path->GetEntryLabel());
+ } else {
+ // Verify that pos >= 0.
+ GpuRegister pos_reg = pos.AsRegister<GpuRegister>();
+ __ Bltzc(pos_reg, slow_path->GetEntryLabel());
+
+ // Check that (length(input) - pos) >= zero.
+ __ LoadFromOffset(kLoadWord, AT, input, length_offset);
+ __ Subu(AT, AT, pos_reg);
+ __ Bltzc(AT, slow_path->GetEntryLabel());
+
+ // Verify that (length(input) - pos) >= length.
+ EnoughItems(assembler, AT, length, slow_path);
+ }
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitSystemArrayCopyChar(HInvoke* invoke) {
+ Mips64Assembler* assembler = GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
+
+ GpuRegister src = locations->InAt(0).AsRegister<GpuRegister>();
+ Location src_pos = locations->InAt(1);
+ GpuRegister dest = locations->InAt(2).AsRegister<GpuRegister>();
+ Location dest_pos = locations->InAt(3);
+ Location length = locations->InAt(4);
+
+ Mips64Label loop;
+
+ GpuRegister dest_base = locations->GetTemp(0).AsRegister<GpuRegister>();
+ GpuRegister src_base = locations->GetTemp(1).AsRegister<GpuRegister>();
+ GpuRegister count = locations->GetTemp(2).AsRegister<GpuRegister>();
+
+ SlowPathCodeMIPS64* slow_path = new (GetAllocator()) IntrinsicSlowPathMIPS64(invoke);
+ codegen_->AddSlowPath(slow_path);
+
+ // Bail out if the source and destination are the same (to handle overlap).
+ __ Beqc(src, dest, slow_path->GetEntryLabel());
+
+ // Bail out if the source is null.
+ __ Beqzc(src, slow_path->GetEntryLabel());
+
+ // Bail out if the destination is null.
+ __ Beqzc(dest, slow_path->GetEntryLabel());
+
+ // Load length into register for count.
+ if (length.IsConstant()) {
+ __ LoadConst32(count, length.GetConstant()->AsIntConstant()->GetValue());
+ } else {
+ // If the length is negative, bail out.
+ // We have already checked in the LocationsBuilder for the constant case.
+ __ Bltzc(length.AsRegister<GpuRegister>(), slow_path->GetEntryLabel());
+
+ __ Move(count, length.AsRegister<GpuRegister>());
+ }
+
+ // Validity checks: source.
+ CheckPosition(assembler, src_pos, src, Location::RegisterLocation(count), slow_path);
+
+ // Validity checks: dest.
+ CheckPosition(assembler, dest_pos, dest, Location::RegisterLocation(count), slow_path);
+
+ // If count is zero, we're done.
+ __ Beqzc(count, slow_path->GetExitLabel());
+
+ // Okay, everything checks out. Finally time to do the copy.
+ // Check assumption that sizeof(Char) is 2 (used in scaling below).
+ const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
+ DCHECK_EQ(char_size, 2u);
+
+ const size_t char_shift = Primitive::ComponentSizeShift(Primitive::kPrimChar);
+
+ const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
+
+ // Calculate source and destination addresses.
+ if (src_pos.IsConstant()) {
+ int32_t src_pos_const = src_pos.GetConstant()->AsIntConstant()->GetValue();
+
+ __ Daddiu64(src_base, src, data_offset + char_size * src_pos_const, TMP);
+ } else {
+ __ Daddiu64(src_base, src, data_offset, TMP);
+ __ Dlsa(src_base, src_pos.AsRegister<GpuRegister>(), src_base, char_shift);
+ }
+ if (dest_pos.IsConstant()) {
+ int32_t dest_pos_const = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+
+ __ Daddiu64(dest_base, dest, data_offset + char_size * dest_pos_const, TMP);
+ } else {
+ __ Daddiu64(dest_base, dest, data_offset, TMP);
+ __ Dlsa(dest_base, dest_pos.AsRegister<GpuRegister>(), dest_base, char_shift);
+ }
+
+ __ Bind(&loop);
+ __ Lh(TMP, src_base, 0);
+ __ Daddiu(src_base, src_base, char_size);
+ __ Daddiu(count, count, -1);
+ __ Sh(TMP, dest_base, 0);
+ __ Daddiu(dest_base, dest_base, char_size);
+ __ Bnezc(count, &loop);
+
+ __ Bind(slow_path->GetExitLabel());
+}
+
static void GenHighestOneBit(LocationSummary* locations,
Primitive::Type type,
Mips64Assembler* assembler) {
@@ -2171,28 +2364,209 @@ void IntrinsicCodeGeneratorMIPS64::VisitLongLowestOneBit(HInvoke* invoke) {
GenLowestOneBit(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
}
+static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kCallOnMainOnly,
+ kIntrinsified);
+ InvokeRuntimeCallingConvention calling_convention;
+
+ locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+ locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimDouble));
+}
+
+static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kCallOnMainOnly,
+ kIntrinsified);
+ InvokeRuntimeCallingConvention calling_convention;
+
+ locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+ locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
+ locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimDouble));
+}
+
+static void GenFPToFPCall(HInvoke* invoke,
+ CodeGeneratorMIPS64* codegen,
+ QuickEntrypointEnum entry) {
+ LocationSummary* locations = invoke->GetLocations();
+ FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>();
+ DCHECK_EQ(in, F12);
+ FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
+ DCHECK_EQ(out, F0);
+
+ codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
+}
+
+static void GenFPFPToFPCall(HInvoke* invoke,
+ CodeGeneratorMIPS64* codegen,
+ QuickEntrypointEnum entry) {
+ LocationSummary* locations = invoke->GetLocations();
+ FpuRegister in0 = locations->InAt(0).AsFpuRegister<FpuRegister>();
+ DCHECK_EQ(in0, F12);
+ FpuRegister in1 = locations->InAt(1).AsFpuRegister<FpuRegister>();
+ DCHECK_EQ(in1, F13);
+ FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
+ DCHECK_EQ(out, F0);
+
+ codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
+}
+
+// static double java.lang.Math.cos(double a)
+void IntrinsicLocationsBuilderMIPS64::VisitMathCos(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathCos(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickCos);
+}
+
+// static double java.lang.Math.sin(double a)
+void IntrinsicLocationsBuilderMIPS64::VisitMathSin(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathSin(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickSin);
+}
+
+// static double java.lang.Math.acos(double a)
+void IntrinsicLocationsBuilderMIPS64::VisitMathAcos(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathAcos(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickAcos);
+}
+
+// static double java.lang.Math.asin(double a)
+void IntrinsicLocationsBuilderMIPS64::VisitMathAsin(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathAsin(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickAsin);
+}
+
+// static double java.lang.Math.atan(double a)
+void IntrinsicLocationsBuilderMIPS64::VisitMathAtan(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathAtan(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickAtan);
+}
+
+// static double java.lang.Math.atan2(double y, double x)
+void IntrinsicLocationsBuilderMIPS64::VisitMathAtan2(HInvoke* invoke) {
+ CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathAtan2(HInvoke* invoke) {
+ GenFPFPToFPCall(invoke, codegen_, kQuickAtan2);
+}
+
+// static double java.lang.Math.cbrt(double a)
+void IntrinsicLocationsBuilderMIPS64::VisitMathCbrt(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathCbrt(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickCbrt);
+}
+
+// static double java.lang.Math.cosh(double x)
+void IntrinsicLocationsBuilderMIPS64::VisitMathCosh(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathCosh(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickCosh);
+}
+
+// static double java.lang.Math.exp(double a)
+void IntrinsicLocationsBuilderMIPS64::VisitMathExp(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathExp(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickExp);
+}
+
+// static double java.lang.Math.expm1(double x)
+void IntrinsicLocationsBuilderMIPS64::VisitMathExpm1(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathExpm1(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickExpm1);
+}
+
+// static double java.lang.Math.hypot(double x, double y)
+void IntrinsicLocationsBuilderMIPS64::VisitMathHypot(HInvoke* invoke) {
+ CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathHypot(HInvoke* invoke) {
+ GenFPFPToFPCall(invoke, codegen_, kQuickHypot);
+}
+
+// static double java.lang.Math.log(double a)
+void IntrinsicLocationsBuilderMIPS64::VisitMathLog(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathLog(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickLog);
+}
+
+// static double java.lang.Math.log10(double x)
+void IntrinsicLocationsBuilderMIPS64::VisitMathLog10(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathLog10(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickLog10);
+}
+
+// static double java.lang.Math.nextAfter(double start, double direction)
+void IntrinsicLocationsBuilderMIPS64::VisitMathNextAfter(HInvoke* invoke) {
+ CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathNextAfter(HInvoke* invoke) {
+ GenFPFPToFPCall(invoke, codegen_, kQuickNextAfter);
+}
+
+// static double java.lang.Math.sinh(double x)
+void IntrinsicLocationsBuilderMIPS64::VisitMathSinh(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathSinh(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickSinh);
+}
+
+// static double java.lang.Math.tan(double a)
+void IntrinsicLocationsBuilderMIPS64::VisitMathTan(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathTan(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickTan);
+}
+
+// static double java.lang.Math.tanh(double x)
+void IntrinsicLocationsBuilderMIPS64::VisitMathTanh(HInvoke* invoke) {
+ CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathTanh(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickTanh);
+}
+
UNIMPLEMENTED_INTRINSIC(MIPS64, ReferenceGetReferent)
-UNIMPLEMENTED_INTRINSIC(MIPS64, SystemArrayCopyChar)
UNIMPLEMENTED_INTRINSIC(MIPS64, SystemArrayCopy)
-UNIMPLEMENTED_INTRINSIC(MIPS64, MathCos)
-UNIMPLEMENTED_INTRINSIC(MIPS64, MathSin)
-UNIMPLEMENTED_INTRINSIC(MIPS64, MathAcos)
-UNIMPLEMENTED_INTRINSIC(MIPS64, MathAsin)
-UNIMPLEMENTED_INTRINSIC(MIPS64, MathAtan)
-UNIMPLEMENTED_INTRINSIC(MIPS64, MathAtan2)
-UNIMPLEMENTED_INTRINSIC(MIPS64, MathCbrt)
-UNIMPLEMENTED_INTRINSIC(MIPS64, MathCosh)
-UNIMPLEMENTED_INTRINSIC(MIPS64, MathExp)
-UNIMPLEMENTED_INTRINSIC(MIPS64, MathExpm1)
-UNIMPLEMENTED_INTRINSIC(MIPS64, MathHypot)
-UNIMPLEMENTED_INTRINSIC(MIPS64, MathLog)
-UNIMPLEMENTED_INTRINSIC(MIPS64, MathLog10)
-UNIMPLEMENTED_INTRINSIC(MIPS64, MathNextAfter)
-UNIMPLEMENTED_INTRINSIC(MIPS64, MathSinh)
-UNIMPLEMENTED_INTRINSIC(MIPS64, MathTan)
-UNIMPLEMENTED_INTRINSIC(MIPS64, MathTanh)
-
UNIMPLEMENTED_INTRINSIC(MIPS64, StringStringIndexOf);
UNIMPLEMENTED_INTRINSIC(MIPS64, StringStringIndexOfAfter);
UNIMPLEMENTED_INTRINSIC(MIPS64, StringBufferAppend);
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 4710b32e9c..5a95abdb50 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -63,12 +63,122 @@ static bool IsEarlyExit(HLoopInformation* loop_info) {
return false;
}
+// Detect a sign extension from the given type. Returns the promoted operand on success.
+static bool IsSignExtensionAndGet(HInstruction* instruction,
+ Primitive::Type type,
+ /*out*/ HInstruction** operand) {
+ // Accept any already wider constant that would be handled properly by sign
+ // extension when represented in the *width* of the given narrower data type
+ // (the fact that char normally zero extends does not matter here).
+ int64_t value = 0;
+ if (IsInt64AndGet(instruction, &value)) {
+ switch (type) {
+ case Primitive::kPrimByte:
+ if (std::numeric_limits<int8_t>::min() <= value &&
+ std::numeric_limits<int8_t>::max() >= value) {
+ *operand = instruction;
+ return true;
+ }
+ return false;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ if (std::numeric_limits<int16_t>::min() <= value &&
+ std::numeric_limits<int16_t>::max() <= value) {
+ *operand = instruction;
+ return true;
+ }
+ return false;
+ default:
+ return false;
+ }
+ }
+ // An implicit widening conversion of a signed integer to an integral type sign-extends
+ // the two's-complement representation of the integer value to fill the wider format.
+ if (instruction->GetType() == type && (instruction->IsArrayGet() ||
+ instruction->IsStaticFieldGet() ||
+ instruction->IsInstanceFieldGet())) {
+ switch (type) {
+ case Primitive::kPrimByte:
+ case Primitive::kPrimShort:
+ *operand = instruction;
+ return true;
+ default:
+ return false;
+ }
+ }
+ // TODO: perhaps explicit conversions later too?
+ // (this may return something different from instruction)
+ return false;
+}
+
+// Detect a zero extension from the given type. Returns the promoted operand on success.
+static bool IsZeroExtensionAndGet(HInstruction* instruction,
+ Primitive::Type type,
+ /*out*/ HInstruction** operand) {
+ // Accept any already wider constant that would be handled properly by zero
+ // extension when represented in the *width* of the given narrower data type
+ // (the fact that byte/short normally sign extend does not matter here).
+ int64_t value = 0;
+ if (IsInt64AndGet(instruction, &value)) {
+ switch (type) {
+ case Primitive::kPrimByte:
+ if (std::numeric_limits<uint8_t>::min() <= value &&
+ std::numeric_limits<uint8_t>::max() >= value) {
+ *operand = instruction;
+ return true;
+ }
+ return false;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ if (std::numeric_limits<uint16_t>::min() <= value &&
+ std::numeric_limits<uint16_t>::max() <= value) {
+ *operand = instruction;
+ return true;
+ }
+ return false;
+ default:
+ return false;
+ }
+ }
+ // An implicit widening conversion of a char to an integral type zero-extends
+ // the representation of the char value to fill the wider format.
+ if (instruction->GetType() == type && (instruction->IsArrayGet() ||
+ instruction->IsStaticFieldGet() ||
+ instruction->IsInstanceFieldGet())) {
+ if (type == Primitive::kPrimChar) {
+ *operand = instruction;
+ return true;
+ }
+ }
+ // A sign (or zero) extension followed by an explicit removal of just the
+ // higher sign bits is equivalent to a zero extension of the underlying operand.
+ if (instruction->IsAnd()) {
+ int64_t mask = 0;
+ HInstruction* a = instruction->InputAt(0);
+ HInstruction* b = instruction->InputAt(1);
+ // In (a & b) find (mask & b) or (a & mask) with sign or zero extension on the non-mask.
+ if ((IsInt64AndGet(a, /*out*/ &mask) && (IsSignExtensionAndGet(b, type, /*out*/ operand) ||
+ IsZeroExtensionAndGet(b, type, /*out*/ operand))) ||
+ (IsInt64AndGet(b, /*out*/ &mask) && (IsSignExtensionAndGet(a, type, /*out*/ operand) ||
+ IsZeroExtensionAndGet(a, type, /*out*/ operand)))) {
+ switch ((*operand)->GetType()) {
+ case Primitive::kPrimByte: return mask == std::numeric_limits<uint8_t>::max();
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort: return mask == std::numeric_limits<uint16_t>::max();
+ default: return false;
+ }
+ }
+ }
+ // TODO: perhaps explicit conversions later too?
+ return false;
+}
+
// Test vector restrictions.
static bool HasVectorRestrictions(uint64_t restrictions, uint64_t tested) {
return (restrictions & tested) != 0;
}
-// Inserts an instruction.
+// Insert an instruction.
static HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) {
DCHECK(block != nullptr);
DCHECK(instruction != nullptr);
@@ -713,6 +823,10 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node,
return true;
}
} else if (instruction->IsShl() || instruction->IsShr() || instruction->IsUShr()) {
+ // Recognize vectorization idioms.
+ if (VectorizeHalvingAddIdiom(node, instruction, generate_code, type, restrictions)) {
+ return true;
+ }
// Deal with vector restrictions.
if ((HasVectorRestrictions(restrictions, kNoShift)) ||
(instruction->IsShr() && HasVectorRestrictions(restrictions, kNoShr))) {
@@ -806,11 +920,11 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- *restrictions |= kNoMul | kNoDiv | kNoShift | kNoAbs;
+ *restrictions |= kNoMul | kNoDiv | kNoShift | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd;
return TrySetVectorLength(16);
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- *restrictions |= kNoDiv | kNoAbs;
+ *restrictions |= kNoDiv | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd;
return TrySetVectorLength(8);
case Primitive::kPrimInt:
*restrictions |= kNoDiv;
@@ -1039,6 +1153,90 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org,
#undef GENERATE_VEC
//
+// Vectorization idioms.
+//
+
+// Method recognizes the following idioms:
+// rounding halving add (a + b + 1) >> 1 for unsigned/signed operands a, b
+// regular halving add (a + b) >> 1 for unsigned/signed operands a, b
+// Provided that the operands are promoted to a wider form to do the arithmetic and
+// then cast back to narrower form, the idioms can be mapped into efficient SIMD
+// implementation that operates directly in narrower form (plus one extra bit).
+// TODO: current version recognizes implicit byte/short/char widening only;
+// explicit widening from int to long could be added later.
+bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node,
+ HInstruction* instruction,
+ bool generate_code,
+ Primitive::Type type,
+ uint64_t restrictions) {
+ // Test for top level arithmetic shift right x >> 1 or logical shift right x >>> 1
+ // (note whether the sign bit in higher precision is shifted in has no effect
+ // on the narrow precision computed by the idiom).
+ int64_t value = 0;
+ if ((instruction->IsShr() ||
+ instruction->IsUShr()) &&
+ IsInt64AndGet(instruction->InputAt(1), &value) && value == 1) {
+ //
+ // TODO: make following code less sensitive to associativity and commutativity differences.
+ //
+ HInstruction* x = instruction->InputAt(0);
+ // Test for an optional rounding part (x + 1) >> 1.
+ bool is_rounded = false;
+ if (x->IsAdd() && IsInt64AndGet(x->InputAt(1), &value) && value == 1) {
+ x = x->InputAt(0);
+ is_rounded = true;
+ }
+ // Test for a core addition (a + b) >> 1 (possibly rounded), either unsigned or signed.
+ if (x->IsAdd()) {
+ HInstruction* a = x->InputAt(0);
+ HInstruction* b = x->InputAt(1);
+ HInstruction* r = nullptr;
+ HInstruction* s = nullptr;
+ bool is_unsigned = false;
+ if (IsZeroExtensionAndGet(a, type, &r) && IsZeroExtensionAndGet(b, type, &s)) {
+ is_unsigned = true;
+ } else if (IsSignExtensionAndGet(a, type, &r) && IsSignExtensionAndGet(b, type, &s)) {
+ is_unsigned = false;
+ } else {
+ return false;
+ }
+ // Deal with vector restrictions.
+ if ((!is_unsigned && HasVectorRestrictions(restrictions, kNoSignedHAdd)) ||
+ (!is_rounded && HasVectorRestrictions(restrictions, kNoUnroundedHAdd))) {
+ return false;
+ }
+ // Accept recognized halving add for vectorizable operands. Vectorized code uses the
+ // shorthand idiomatic operation. Sequential code uses the original scalar expressions.
+ DCHECK(r != nullptr && s != nullptr);
+ if (VectorizeUse(node, r, generate_code, type, restrictions) &&
+ VectorizeUse(node, s, generate_code, type, restrictions)) {
+ if (generate_code) {
+ if (vector_mode_ == kVector) {
+ vector_map_->Put(instruction, new (global_allocator_) HVecHalvingAdd(
+ global_allocator_,
+ vector_map_->Get(r),
+ vector_map_->Get(s),
+ type,
+ vector_length_,
+ is_unsigned,
+ is_rounded));
+ } else {
+ VectorizeUse(node, instruction->InputAt(0), generate_code, type, restrictions);
+ VectorizeUse(node, instruction->InputAt(1), generate_code, type, restrictions);
+ GenerateVecOp(instruction,
+ vector_map_->Get(instruction->InputAt(0)),
+ vector_map_->Get(instruction->InputAt(1)),
+ type);
+ }
+ }
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+//
// Helpers.
//
@@ -1082,7 +1280,10 @@ bool HLoopOptimization::TrySetSimpleLoopHeader(HBasicBlock* block) {
HInstruction* s = block->GetFirstInstruction();
if (s != nullptr && s->IsSuspendCheck()) {
HInstruction* c = s->GetNext();
- if (c != nullptr && c->IsCondition() && c->GetUses().HasExactlyOneElement()) {
+ if (c != nullptr &&
+ c->IsCondition() &&
+ c->GetUses().HasExactlyOneElement() && // only used for termination
+ !c->HasEnvironmentUses()) { // unlikely, but not impossible
HInstruction* i = c->GetNext();
if (i != nullptr && i->IsIf() && i->InputAt(0) == c) {
iset_->insert(c);
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index d8f50aab28..4a7da86e32 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -62,13 +62,15 @@ class HLoopOptimization : public HOptimization {
* Vectorization restrictions (bit mask).
*/
enum VectorRestrictions {
- kNone = 0, // no restrictions
- kNoMul = 1, // no multiplication
- kNoDiv = 2, // no division
- kNoShift = 4, // no shift
- kNoShr = 8, // no arithmetic shift right
- kNoHiBits = 16, // "wider" operations cannot bring in higher order bits
- kNoAbs = 32, // no absolute value
+ kNone = 0, // no restrictions
+ kNoMul = 1, // no multiplication
+ kNoDiv = 2, // no division
+ kNoShift = 4, // no shift
+ kNoShr = 8, // no arithmetic shift right
+ kNoHiBits = 16, // "wider" operations cannot bring in higher order bits
+ kNoSignedHAdd = 32, // no signed halving add
+ kNoUnroundedHAdd = 64, // no unrounded halving add
+ kNoAbs = 128, // no absolute value
};
/*
@@ -136,6 +138,13 @@ class HLoopOptimization : public HOptimization {
Primitive::Type type);
void GenerateVecOp(HInstruction* org, HInstruction* opa, HInstruction* opb, Primitive::Type type);
+ // Vectorization idioms.
+ bool VectorizeHalvingAddIdiom(LoopNode* node,
+ HInstruction* instruction,
+ bool generate_code,
+ Primitive::Type type,
+ uint64_t restrictions);
+
// Helpers.
bool TrySetPhiInduction(HPhi* phi, bool restrict_uses);
bool TrySetSimpleLoopHeader(HBasicBlock* block);
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index c109369106..6be237e612 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1369,9 +1369,12 @@ class HLoopInformationOutwardIterator : public ValueObject {
M(VecAbs, VecUnaryOperation) \
M(VecNot, VecUnaryOperation) \
M(VecAdd, VecBinaryOperation) \
+ M(VecHalvingAdd, VecBinaryOperation) \
M(VecSub, VecBinaryOperation) \
M(VecMul, VecBinaryOperation) \
M(VecDiv, VecBinaryOperation) \
+ M(VecMin, VecBinaryOperation) \
+ M(VecMax, VecBinaryOperation) \
M(VecAnd, VecBinaryOperation) \
M(VecAndNot, VecBinaryOperation) \
M(VecOr, VecBinaryOperation) \
@@ -6845,6 +6848,7 @@ class HBlocksInLoopReversePostOrderIterator : public ValueObject {
DISALLOW_COPY_AND_ASSIGN(HBlocksInLoopReversePostOrderIterator);
};
+// Returns int64_t value of a properly typed constant.
inline int64_t Int64FromConstant(HConstant* constant) {
if (constant->IsIntConstant()) {
return constant->AsIntConstant()->GetValue();
@@ -6856,6 +6860,21 @@ inline int64_t Int64FromConstant(HConstant* constant) {
}
}
+// Returns true iff instruction is an integral constant (and sets value on success).
+inline bool IsInt64AndGet(HInstruction* instruction, /*out*/ int64_t* value) {
+ if (instruction->IsIntConstant()) {
+ *value = instruction->AsIntConstant()->GetValue();
+ return true;
+ } else if (instruction->IsLongConstant()) {
+ *value = instruction->AsLongConstant()->GetValue();
+ return true;
+ } else if (instruction->IsNullConstant()) {
+ *value = 0;
+ return true;
+ }
+ return false;
+}
+
#define INSTRUCTION_TYPE_CHECK(type, super) \
inline bool HInstruction::Is##type() const { return GetKind() == k##type; } \
inline const H##type* HInstruction::As##type() const { \
diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h
index 0cbbf2a215..bff58d0910 100644
--- a/compiler/optimizing/nodes_vector.h
+++ b/compiler/optimizing/nodes_vector.h
@@ -338,6 +338,42 @@ class HVecAdd FINAL : public HVecBinaryOperation {
DISALLOW_COPY_AND_ASSIGN(HVecAdd);
};
+// Performs halving add on every component in the two vectors, viz.
+// rounded [ x1, .. , xn ] hradd [ y1, .. , yn ] = [ (x1 + y1 + 1) >> 1, .. , (xn + yn + 1) >> 1 ]
+// or [ x1, .. , xn ] hadd [ y1, .. , yn ] = [ (x1 + y1) >> 1, .. , (xn + yn ) >> 1 ]
+// for signed operands x, y (sign extension) or unsigned operands x, y (zero extension).
+class HVecHalvingAdd FINAL : public HVecBinaryOperation {
+ public:
+ HVecHalvingAdd(ArenaAllocator* arena,
+ HInstruction* left,
+ HInstruction* right,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ bool is_unsigned,
+ bool is_rounded,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc),
+ is_unsigned_(is_unsigned),
+ is_rounded_(is_rounded) {
+ DCHECK(left->IsVecOperation() && right->IsVecOperation());
+ DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
+ DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type);
+ SetRawInputAt(0, left);
+ SetRawInputAt(1, right);
+ }
+
+ bool IsUnsigned() const { return is_unsigned_; }
+ bool IsRounded() const { return is_rounded_; }
+
+ DECLARE_INSTRUCTION(VecHalvingAdd);
+
+ private:
+ bool is_unsigned_;
+ bool is_rounded_;
+
+ DISALLOW_COPY_AND_ASSIGN(HVecHalvingAdd);
+};
+
// Subtracts every component in the two vectors,
// viz. [ x1, .. , xn ] - [ y1, .. , yn ] = [ x1 - y1, .. , xn - yn ].
class HVecSub FINAL : public HVecBinaryOperation {
@@ -404,6 +440,50 @@ class HVecDiv FINAL : public HVecBinaryOperation {
DISALLOW_COPY_AND_ASSIGN(HVecDiv);
};
+// Takes minimum of every component in the two vectors,
+// viz. MIN( [ x1, .. , xn ] , [ y1, .. , yn ]) = [ min(x1, y1), .. , min(xn, yn) ].
+class HVecMin FINAL : public HVecBinaryOperation {
+ public:
+ HVecMin(ArenaAllocator* arena,
+ HInstruction* left,
+ HInstruction* right,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+ DCHECK(left->IsVecOperation() && right->IsVecOperation());
+ DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
+ DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type);
+ SetRawInputAt(0, left);
+ SetRawInputAt(1, right);
+ }
+ DECLARE_INSTRUCTION(VecMin);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecMin);
+};
+
+// Takes maximum of every component in the two vectors,
+// viz. MAX( [ x1, .. , xn ] , [ y1, .. , yn ]) = [ max(x1, y1), .. , max(xn, yn) ].
+class HVecMax FINAL : public HVecBinaryOperation {
+ public:
+ HVecMax(ArenaAllocator* arena,
+ HInstruction* left,
+ HInstruction* right,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+ DCHECK(left->IsVecOperation() && right->IsVecOperation());
+ DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
+ DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type);
+ SetRawInputAt(0, left);
+ SetRawInputAt(1, right);
+ }
+ DECLARE_INSTRUCTION(VecMax);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecMax);
+};
+
// Bitwise-ands every component in the two vectors,
// viz. [ x1, .. , xn ] & [ y1, .. , yn ] = [ x1 & y1, .. , xn & yn ].
class HVecAnd FINAL : public HVecBinaryOperation {