summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/code_generator_vector_arm.cc8
-rw-r--r--compiler/optimizing/code_generator_vector_arm64.cc31
-rw-r--r--compiler/optimizing/code_generator_vector_arm_vixl.cc8
-rw-r--r--compiler/optimizing/code_generator_vector_mips.cc8
-rw-r--r--compiler/optimizing/code_generator_vector_mips64.cc8
-rw-r--r--compiler/optimizing/code_generator_vector_x86.cc40
-rw-r--r--compiler/optimizing/code_generator_vector_x86_64.cc40
-rw-r--r--compiler/optimizing/loop_optimization.cc86
-rw-r--r--compiler/optimizing/loop_optimization.h1
-rw-r--r--compiler/optimizing/nodes.cc4
-rw-r--r--compiler/optimizing/nodes.h5
-rw-r--r--compiler/optimizing/nodes_vector.h19
12 files changed, 241 insertions, 17 deletions
diff --git a/compiler/optimizing/code_generator_vector_arm.cc b/compiler/optimizing/code_generator_vector_arm.cc
index ba2b2cb2c9..e7f7b3019c 100644
--- a/compiler/optimizing/code_generator_vector_arm.cc
+++ b/compiler/optimizing/code_generator_vector_arm.cc
@@ -81,6 +81,14 @@ void InstructionCodeGeneratorARM::VisitVecNeg(HVecNeg* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
+void LocationsBuilderARM::VisitVecAbs(HVecAbs* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecAbs(HVecAbs* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
void LocationsBuilderARM::VisitVecNot(HVecNot* instruction) {
CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
}
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc
index 96d00210b8..f4874fe2bc 100644
--- a/compiler/optimizing/code_generator_vector_arm64.cc
+++ b/compiler/optimizing/code_generator_vector_arm64.cc
@@ -169,6 +169,37 @@ void InstructionCodeGeneratorARM64::VisitVecNeg(HVecNeg* instruction) {
}
}
+void LocationsBuilderARM64::VisitVecAbs(HVecAbs* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecAbs(HVecAbs* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ FPRegister src = DRegisterFrom(locations->InAt(0));
+ FPRegister dst = DRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Abs(dst.V8B(), src.V8B());
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Abs(dst.V4H(), src.V4H());
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Abs(dst.V2S(), src.V2S());
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Fabs(dst.V2S(), src.V2S());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ }
+}
+
void LocationsBuilderARM64::VisitVecNot(HVecNot* instruction) {
CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
}
diff --git a/compiler/optimizing/code_generator_vector_arm_vixl.cc b/compiler/optimizing/code_generator_vector_arm_vixl.cc
index 171198902d..74fa584e09 100644
--- a/compiler/optimizing/code_generator_vector_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_vector_arm_vixl.cc
@@ -81,6 +81,14 @@ void InstructionCodeGeneratorARMVIXL::VisitVecNeg(HVecNeg* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
+void LocationsBuilderARMVIXL::VisitVecAbs(HVecAbs* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecAbs(HVecAbs* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
void LocationsBuilderARMVIXL::VisitVecNot(HVecNot* instruction) {
CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
}
diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc
index 6f5fe0d2a4..6969abd422 100644
--- a/compiler/optimizing/code_generator_vector_mips.cc
+++ b/compiler/optimizing/code_generator_vector_mips.cc
@@ -81,6 +81,14 @@ void InstructionCodeGeneratorMIPS::VisitVecNeg(HVecNeg* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
+void LocationsBuilderMIPS::VisitVecAbs(HVecAbs* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecAbs(HVecAbs* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
void LocationsBuilderMIPS::VisitVecNot(HVecNot* instruction) {
CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
}
diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc
index 2ee7ac91cf..87118cefa5 100644
--- a/compiler/optimizing/code_generator_vector_mips64.cc
+++ b/compiler/optimizing/code_generator_vector_mips64.cc
@@ -81,6 +81,14 @@ void InstructionCodeGeneratorMIPS64::VisitVecNeg(HVecNeg* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
+void LocationsBuilderMIPS64::VisitVecAbs(HVecAbs* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecAbs(HVecAbs* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
void LocationsBuilderMIPS64::VisitVecNot(HVecNot* instruction) {
CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
}
diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc
index 4f3988ee2e..8dabb4d08f 100644
--- a/compiler/optimizing/code_generator_vector_x86.cc
+++ b/compiler/optimizing/code_generator_vector_x86.cc
@@ -199,6 +199,46 @@ void InstructionCodeGeneratorX86::VisitVecNeg(HVecNeg* instruction) {
}
}
+void LocationsBuilderX86::VisitVecAbs(HVecAbs* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+ if (instruction->GetPackedType() == Primitive::kPrimInt) {
+ instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
+ }
+}
+
+void InstructionCodeGeneratorX86::VisitVecAbs(HVecAbs* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimInt: {
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ __ movaps(dst, src);
+ __ pxor(tmp, tmp);
+ __ pcmpgtd(tmp, dst);
+ __ pxor(dst, tmp);
+ __ psubd(dst, tmp);
+ break;
+ }
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ pcmpeqb(dst, dst); // all ones
+ __ psrld(dst, Immediate(1));
+ __ andps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ pcmpeqb(dst, dst); // all ones
+ __ psrlq(dst, Immediate(1));
+ __ andpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
void LocationsBuilderX86::VisitVecNot(HVecNot* instruction) {
CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
// Boolean-not requires a temporary to construct the 16 x one.
diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc
index b1c1494f6b..e95608839b 100644
--- a/compiler/optimizing/code_generator_vector_x86_64.cc
+++ b/compiler/optimizing/code_generator_vector_x86_64.cc
@@ -192,6 +192,46 @@ void InstructionCodeGeneratorX86_64::VisitVecNeg(HVecNeg* instruction) {
}
}
+void LocationsBuilderX86_64::VisitVecAbs(HVecAbs* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+ if (instruction->GetPackedType() == Primitive::kPrimInt) {
+ instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
+ }
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecAbs(HVecAbs* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimInt: {
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ __ movaps(dst, src);
+ __ pxor(tmp, tmp);
+ __ pcmpgtd(tmp, dst);
+ __ pxor(dst, tmp);
+ __ psubd(dst, tmp);
+ break;
+ }
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ pcmpeqb(dst, dst); // all ones
+ __ psrld(dst, Immediate(1));
+ __ andps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ pcmpeqb(dst, dst); // all ones
+ __ psrlq(dst, Immediate(1));
+ __ andpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
void LocationsBuilderX86_64::VisitVecNot(HVecNot* instruction) {
CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
// Boolean-not requires a temporary to construct the 16 x one.
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 42ed04dfa3..ca31bf89e6 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -533,29 +533,25 @@ void HLoopOptimization::GenerateNewLoop(LoopNode* node,
kNoRegNumber,
0,
HPhi::ToPhiType(induc_type));
- // Generate header.
+ // Generate header and prepare body.
// for (i = lo; i < hi; i += step)
// <loop-body>
HInstruction* cond = new (global_allocator_) HAboveOrEqual(vector_phi_, hi);
vector_header_->AddPhi(vector_phi_);
vector_header_->AddInstruction(cond);
vector_header_->AddInstruction(new (global_allocator_) HIf(cond));
- // Suspend check and environment.
- HInstruction* suspend = vector_header_->GetFirstInstruction();
- suspend->CopyEnvironmentFromWithLoopPhiAdjustment(
- node->loop_info->GetSuspendCheck()->GetEnvironment(), vector_header_);
- // Generate body.
for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
bool vectorized_def = VectorizeDef(node, it.Current(), /*generate_code*/ true);
DCHECK(vectorized_def);
}
+ // Generate body.
+ HEnvironment* env = vector_header_->GetFirstInstruction()->GetEnvironment();
for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
auto i = vector_map_->find(it.Current());
if (i != vector_map_->end() && !i->second->IsInBlock()) {
Insert(vector_body_, i->second); // lays out in original order
if (i->second->NeedsEnvironment()) {
- i->second->CopyEnvironmentFromWithLoopPhiAdjustment(
- suspend->GetEnvironment(), vector_header_);
+ i->second->CopyEnvironmentFromWithLoopPhiAdjustment(env, vector_header_);
}
}
}
@@ -735,8 +731,32 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node,
return true;
}
} else if (instruction->IsInvokeStaticOrDirect()) {
- // TODO: coming soon.
- return false;
+ // Accept particular intrinsics.
+ HInvokeStaticOrDirect* invoke = instruction->AsInvokeStaticOrDirect();
+ switch (invoke->GetIntrinsic()) {
+ case Intrinsics::kMathAbsInt:
+ case Intrinsics::kMathAbsLong:
+ case Intrinsics::kMathAbsFloat:
+ case Intrinsics::kMathAbsDouble: {
+ // Deal with vector restrictions.
+ if (HasVectorRestrictions(restrictions, kNoAbs) ||
+ HasVectorRestrictions(restrictions, kNoHiBits)) {
+ // TODO: we can do better for some hibits cases.
+ return false;
+ }
+ // Accept ABS(x) for vectorizable operand.
+ HInstruction* opa = instruction->InputAt(0);
+ if (VectorizeUse(node, opa, generate_code, type, restrictions)) {
+ if (generate_code) {
+ GenerateVecOp(instruction, vector_map_->Get(opa), nullptr, type);
+ }
+ return true;
+ }
+ return false;
+ }
+ default:
+ return false;
+ } // switch
}
return false;
}
@@ -754,11 +774,11 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- *restrictions |= kNoDiv;
+ *restrictions |= kNoDiv | kNoAbs;
return TrySetVectorLength(8);
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- *restrictions |= kNoDiv;
+ *restrictions |= kNoDiv | kNoAbs;
return TrySetVectorLength(4);
case Primitive::kPrimInt:
*restrictions |= kNoDiv;
@@ -775,17 +795,17 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- *restrictions |= kNoMul | kNoDiv | kNoShift;
+ *restrictions |= kNoMul | kNoDiv | kNoShift | kNoAbs;
return TrySetVectorLength(16);
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- *restrictions |= kNoDiv;
+ *restrictions |= kNoDiv | kNoAbs;
return TrySetVectorLength(8);
case Primitive::kPrimInt:
*restrictions |= kNoDiv;
return TrySetVectorLength(4);
case Primitive::kPrimLong:
- *restrictions |= kNoMul | kNoDiv | kNoShr;
+ *restrictions |= kNoMul | kNoDiv | kNoShr | kNoAbs;
return TrySetVectorLength(2);
case Primitive::kPrimFloat:
return TrySetVectorLength(4);
@@ -956,7 +976,41 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org,
new (global_allocator_) HVecUShr(global_allocator_, opa, opb, type, vector_length_),
new (global_allocator_) HUShr(type, opa, opb));
case HInstruction::kInvokeStaticOrDirect: {
- // TODO: coming soon.
+ HInvokeStaticOrDirect* invoke = org->AsInvokeStaticOrDirect();
+ if (vector_mode_ == kVector) {
+ switch (invoke->GetIntrinsic()) {
+ case Intrinsics::kMathAbsInt:
+ case Intrinsics::kMathAbsLong:
+ case Intrinsics::kMathAbsFloat:
+ case Intrinsics::kMathAbsDouble:
+ DCHECK(opb == nullptr);
+ vector = new (global_allocator_) HVecAbs(global_allocator_, opa, type, vector_length_);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD intrinsic";
+ UNREACHABLE();
+ } // switch invoke
+ } else {
+ // In scalar code, simply clone the method invoke, and replace its operands
+ // with the corresponding new scalar instructions in the loop.
+ DCHECK(vector_mode_ == kSequential);
+ HInvokeStaticOrDirect* new_invoke = new (global_allocator_) HInvokeStaticOrDirect(
+ global_allocator_,
+ invoke->GetNumberOfArguments(),
+ invoke->GetType(),
+ invoke->GetDexPc(),
+ invoke->GetDexMethodIndex(),
+ invoke->GetResolvedMethod(),
+ invoke->GetDispatchInfo(),
+ invoke->GetInvokeType(),
+ invoke->GetTargetMethod(),
+ invoke->GetClinitCheckRequirement());
+ HInputsRef inputs = invoke->GetInputs();
+ for (size_t index = 0; index < inputs.size(); ++index) {
+ new_invoke->SetArgumentAt(index, vector_map_->Get(inputs[index]));
+ }
+ vector = new_invoke;
+ }
break;
}
default:
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index 16f7691af2..d8f50aab28 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -68,6 +68,7 @@ class HLoopOptimization : public HOptimization {
kNoShift = 4, // no shift
kNoShr = 8, // no arithmetic shift right
kNoHiBits = 16, // "wider" operations cannot bring in higher order bits
+ kNoAbs = 32, // no absolute value
};
/*
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 5617e4bfcb..e71fea92a9 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -2380,11 +2380,13 @@ HBasicBlock* HGraph::TransformLoopForVectorization(HBasicBlock* header,
MakeRoomFor(&reverse_post_order_, 1, index_of_body - 1);
reverse_post_order_[index_of_body] = new_body;
- // Add gotos and suspend check (client must add conditional in header and copy environment).
+ // Add gotos and suspend check (client must add conditional in header).
new_pre_header->AddInstruction(new (arena_) HGoto());
HSuspendCheck* suspend_check = new (arena_) HSuspendCheck(header->GetDexPc());
new_header->AddInstruction(suspend_check);
new_body->AddInstruction(new (arena_) HGoto());
+ suspend_check->CopyEnvironmentFromWithLoopPhiAdjustment(
+ loop->GetSuspendCheck()->GetEnvironment(), header);
// Update loop information.
new_header->AddBackEdge(new_body);
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 52a02c2285..671f950aa6 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1374,6 +1374,7 @@ class HLoopInformationOutwardIterator : public ValueObject {
M(VecSumReduce, VecUnaryOperation) \
M(VecCnv, VecUnaryOperation) \
M(VecNeg, VecUnaryOperation) \
+ M(VecAbs, VecUnaryOperation) \
M(VecNot, VecUnaryOperation) \
M(VecAdd, VecBinaryOperation) \
M(VecSub, VecBinaryOperation) \
@@ -4224,6 +4225,10 @@ class HInvokeStaticOrDirect FINAL : public HInvoke {
dispatch_info_ = dispatch_info;
}
+ DispatchInfo GetDispatchInfo() const {
+ return dispatch_info_;
+ }
+
void AddSpecialInput(HInstruction* input) {
// We allow only one special input.
DCHECK(!IsStringInit() && !HasCurrentMethodInput());
diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h
index 9f9b918f17..0cbbf2a215 100644
--- a/compiler/optimizing/nodes_vector.h
+++ b/compiler/optimizing/nodes_vector.h
@@ -278,6 +278,25 @@ class HVecNeg FINAL : public HVecUnaryOperation {
DISALLOW_COPY_AND_ASSIGN(HVecNeg);
};
+// Takes absolute value of every component in the vector,
+// viz. abs[ x1, .. , xn ] = [ |x1|, .. , |xn| ].
+class HVecAbs FINAL : public HVecUnaryOperation {
+ public:
+ HVecAbs(ArenaAllocator* arena,
+ HInstruction* input,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecUnaryOperation(arena, packed_type, vector_length, dex_pc) {
+ DCHECK(input->IsVecOperation());
+ DCHECK_EQ(input->AsVecOperation()->GetPackedType(), packed_type);
+ SetRawInputAt(0, input);
+ }
+ DECLARE_INSTRUCTION(VecAbs);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecAbs);
+};
+
// Bitwise- or boolean-nots every component in the vector,
// viz. not[ x1, .. , xn ] = [ ~x1, .. , ~xn ], or
// not[ x1, .. , xn ] = [ !x1, .. , !xn ] for boolean.