summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
author Aart Bik <ajcbik@google.com> 2017-08-30 21:21:41 +0000
committer Aart Bik <ajcbik@google.com> 2017-08-30 21:21:41 +0000
commita57b4ee7b15ce6abfb5fa88c8dc8a516fe40e0d9 (patch)
treec7ed7e8cb7439a8e689e399e34559aa46a97cdbd /compiler/optimizing
parent9879d0eac8fe2aae19ca6a4a2a83222d6383afc2 (diff)
Revert "Basic SIMD reduction support."
This reverts commit 9879d0eac8fe2aae19ca6a4a2a83222d6383afc2. Getting these type check failures in some builds. Need time to look at this better, so reverting for now :-( dex2oatd F 08-30 21:14:29 210122 226218 code_generator.cc:115] Check failed: CheckType(instruction->GetType(), locations->InAt(0)) PrimDouble C Change-Id: I1c1c87b6323e01442e8fbd94869ddc9e760ea1fc
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/code_generator_vector_arm64.cc172
-rw-r--r--compiler/optimizing/code_generator_vector_arm_vixl.cc28
-rw-r--r--compiler/optimizing/code_generator_vector_mips.cc28
-rw-r--r--compiler/optimizing/code_generator_vector_mips64.cc28
-rw-r--r--compiler/optimizing/code_generator_vector_x86.cc275
-rw-r--r--compiler/optimizing/code_generator_vector_x86_64.cc249
-rw-r--r--compiler/optimizing/loop_optimization.cc216
-rw-r--r--compiler/optimizing/loop_optimization.h30
-rw-r--r--compiler/optimizing/nodes.h14
-rw-r--r--compiler/optimizing/nodes_vector.h97
-rw-r--r--compiler/optimizing/nodes_vector_test.cc28
-rw-r--r--compiler/optimizing/scheduler_arm64.cc16
-rw-r--r--compiler/optimizing/scheduler_arm64.h5
13 files changed, 185 insertions, 1001 deletions
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc
index 18a55c8b09..9095ecdf16 100644
--- a/compiler/optimizing/code_generator_vector_arm64.cc
+++ b/compiler/optimizing/code_generator_vector_arm64.cc
@@ -27,13 +27,12 @@ namespace arm64 {
using helpers::ARM64EncodableConstantOrRegister;
using helpers::Arm64CanEncodeConstantAsImmediate;
using helpers::DRegisterFrom;
+using helpers::VRegisterFrom;
using helpers::HeapOperand;
using helpers::InputRegisterAt;
using helpers::Int64ConstantFrom;
-using helpers::OutputRegister;
-using helpers::VRegisterFrom;
-using helpers::WRegisterFrom;
using helpers::XRegisterFrom;
+using helpers::WRegisterFrom;
#define __ GetVIXLAssembler()->
@@ -128,51 +127,20 @@ void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar*
}
}
-void LocationsBuilderARM64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
- switch (instruction->GetPackedType()) {
- case Primitive::kPrimBoolean:
- case Primitive::kPrimByte:
- case Primitive::kPrimChar:
- case Primitive::kPrimShort:
- case Primitive::kPrimInt:
- case Primitive::kPrimLong:
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetOut(Location::RequiresRegister());
- break;
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble:
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetOut(Location::SameAsFirstInput());
- break;
- default:
- LOG(FATAL) << "Unsupported SIMD type";
- UNREACHABLE();
- }
+void LocationsBuilderARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
-void InstructionCodeGeneratorARM64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
- LocationSummary* locations = instruction->GetLocations();
- VRegister src = VRegisterFrom(locations->InAt(0));
- switch (instruction->GetPackedType()) {
- case Primitive::kPrimInt:
- DCHECK_EQ(4u, instruction->GetVectorLength());
- __ Umov(OutputRegister(instruction), src.V4S(), 0);
- break;
- case Primitive::kPrimLong:
- DCHECK_EQ(2u, instruction->GetVectorLength());
- __ Umov(OutputRegister(instruction), src.V2D(), 0);
- break;
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble:
- DCHECK_LE(2u, instruction->GetVectorLength());
- DCHECK_LE(instruction->GetVectorLength(), 4u);
- DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required
- break;
- default:
- LOG(FATAL) << "Unsupported SIMD type";
- UNREACHABLE();
- }
+void InstructionCodeGeneratorARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM64::VisitVecSumReduce(HVecSumReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARM64::VisitVecSumReduce(HVecSumReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
// Helper to set up locations for vector unary operations.
@@ -201,46 +169,6 @@ static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* in
}
}
-void LocationsBuilderARM64::VisitVecReduce(HVecReduce* instruction) {
- CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
-}
-
-void InstructionCodeGeneratorARM64::VisitVecReduce(HVecReduce* instruction) {
- LocationSummary* locations = instruction->GetLocations();
- VRegister src = VRegisterFrom(locations->InAt(0));
- VRegister dst = DRegisterFrom(locations->Out());
- switch (instruction->GetPackedType()) {
- case Primitive::kPrimInt:
- DCHECK_EQ(4u, instruction->GetVectorLength());
- switch (instruction->GetKind()) {
- case HVecReduce::kSum:
- __ Addv(dst.S(), src.V4S());
- break;
- case HVecReduce::kMin:
- __ Sminv(dst.S(), src.V4S());
- break;
- case HVecReduce::kMax:
- __ Smaxv(dst.S(), src.V4S());
- break;
- }
- break;
- case Primitive::kPrimLong:
- DCHECK_EQ(2u, instruction->GetVectorLength());
- switch (instruction->GetKind()) {
- case HVecReduce::kSum:
- __ Addp(dst.D(), src.V2D());
- break;
- default:
- LOG(FATAL) << "Unsupported SIMD min/max";
- UNREACHABLE();
- }
- break;
- default:
- LOG(FATAL) << "Unsupported SIMD type";
- UNREACHABLE();
- }
-}
-
void LocationsBuilderARM64::VisitVecCnv(HVecCnv* instruction) {
CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
}
@@ -335,7 +263,6 @@ void InstructionCodeGeneratorARM64::VisitVecAbs(HVecAbs* instruction) {
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
- UNREACHABLE();
}
}
@@ -878,77 +805,6 @@ void InstructionCodeGeneratorARM64::VisitVecUShr(HVecUShr* instruction) {
}
}
-void LocationsBuilderARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
-
- DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
-
- HInstruction* input = instruction->InputAt(0);
- bool is_zero = IsZeroBitPattern(input);
-
- switch (instruction->GetPackedType()) {
- case Primitive::kPrimBoolean:
- case Primitive::kPrimByte:
- case Primitive::kPrimChar:
- case Primitive::kPrimShort:
- case Primitive::kPrimInt:
- case Primitive::kPrimLong:
- locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
- : Location::RequiresRegister());
- locations->SetOut(Location::RequiresFpuRegister());
- break;
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble:
- locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
- : Location::RequiresFpuRegister());
- locations->SetOut(Location::RequiresFpuRegister());
- break;
- default:
- LOG(FATAL) << "Unsupported SIMD type";
- UNREACHABLE();
- }
-}
-
-void InstructionCodeGeneratorARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
- LocationSummary* locations = instruction->GetLocations();
- VRegister dst = VRegisterFrom(locations->Out());
-
- DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
-
- // Zero out all other elements first.
- __ Movi(dst.V16B(), 0);
-
- // Shorthand for any type of zero.
- if (IsZeroBitPattern(instruction->InputAt(0))) {
- return;
- }
-
- // Set required elements.
- switch (instruction->GetPackedType()) {
- case Primitive::kPrimBoolean:
- case Primitive::kPrimByte:
- DCHECK_EQ(16u, instruction->GetVectorLength());
- __ Mov(dst.V16B(), 0, InputRegisterAt(instruction, 0));
- break;
- case Primitive::kPrimChar:
- case Primitive::kPrimShort:
- DCHECK_EQ(8u, instruction->GetVectorLength());
- __ Mov(dst.V8H(), 0, InputRegisterAt(instruction, 0));
- break;
- case Primitive::kPrimInt:
- DCHECK_EQ(4u, instruction->GetVectorLength());
- __ Mov(dst.V4S(), 0, InputRegisterAt(instruction, 0));
- break;
- case Primitive::kPrimLong:
- DCHECK_EQ(2u, instruction->GetVectorLength());
- __ Mov(dst.V2D(), 0, InputRegisterAt(instruction, 0));
- break;
- default:
- LOG(FATAL) << "Unsupported SIMD type";
- UNREACHABLE();
- }
-}
-
void LocationsBuilderARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
switch (instr->GetPackedType()) {
diff --git a/compiler/optimizing/code_generator_vector_arm_vixl.cc b/compiler/optimizing/code_generator_vector_arm_vixl.cc
index 7a11dff41e..527691d9d9 100644
--- a/compiler/optimizing/code_generator_vector_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_vector_arm_vixl.cc
@@ -73,11 +73,19 @@ void InstructionCodeGeneratorARMVIXL::VisitVecReplicateScalar(HVecReplicateScala
}
}
-void LocationsBuilderARMVIXL::VisitVecExtractScalar(HVecExtractScalar* instruction) {
+void LocationsBuilderARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
-void InstructionCodeGeneratorARMVIXL::VisitVecExtractScalar(HVecExtractScalar* instruction) {
+void LocationsBuilderARMVIXL::VisitVecSumReduce(HVecSumReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecSumReduce(HVecSumReduce* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
@@ -104,14 +112,6 @@ static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* in
}
}
-void LocationsBuilderARMVIXL::VisitVecReduce(HVecReduce* instruction) {
- CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
-}
-
-void InstructionCodeGeneratorARMVIXL::VisitVecReduce(HVecReduce* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
void LocationsBuilderARMVIXL::VisitVecCnv(HVecCnv* instruction) {
CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
}
@@ -621,14 +621,6 @@ void InstructionCodeGeneratorARMVIXL::VisitVecUShr(HVecUShr* instruction) {
}
}
-void LocationsBuilderARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
-void InstructionCodeGeneratorARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
void LocationsBuilderARMVIXL::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
LOG(FATAL) << "No SIMD for " << instr->GetId();
}
diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc
index c2fbf7f04b..6bf28ab1a3 100644
--- a/compiler/optimizing/code_generator_vector_mips.cc
+++ b/compiler/optimizing/code_generator_vector_mips.cc
@@ -88,11 +88,19 @@ void InstructionCodeGeneratorMIPS::VisitVecReplicateScalar(HVecReplicateScalar*
}
}
-void LocationsBuilderMIPS::VisitVecExtractScalar(HVecExtractScalar* instruction) {
+void LocationsBuilderMIPS::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecSetScalars(HVecSetScalars* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
-void InstructionCodeGeneratorMIPS::VisitVecExtractScalar(HVecExtractScalar* instruction) {
+void LocationsBuilderMIPS::VisitVecSumReduce(HVecSumReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecSumReduce(HVecSumReduce* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
@@ -125,14 +133,6 @@ static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* in
}
}
-void LocationsBuilderMIPS::VisitVecReduce(HVecReduce* instruction) {
- CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
-}
-
-void InstructionCodeGeneratorMIPS::VisitVecReduce(HVecReduce* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
void LocationsBuilderMIPS::VisitVecCnv(HVecCnv* instruction) {
CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
}
@@ -818,14 +818,6 @@ void InstructionCodeGeneratorMIPS::VisitVecUShr(HVecUShr* instruction) {
}
}
-void LocationsBuilderMIPS::VisitVecSetScalars(HVecSetScalars* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
-void InstructionCodeGeneratorMIPS::VisitVecSetScalars(HVecSetScalars* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
void LocationsBuilderMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
switch (instr->GetPackedType()) {
diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc
index 9d3a777c13..75bf7a7cbb 100644
--- a/compiler/optimizing/code_generator_vector_mips64.cc
+++ b/compiler/optimizing/code_generator_vector_mips64.cc
@@ -91,11 +91,19 @@ void InstructionCodeGeneratorMIPS64::VisitVecReplicateScalar(HVecReplicateScalar
}
}
-void LocationsBuilderMIPS64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
+void LocationsBuilderMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
-void InstructionCodeGeneratorMIPS64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
+void LocationsBuilderMIPS64::VisitVecSumReduce(HVecSumReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecSumReduce(HVecSumReduce* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
@@ -128,14 +136,6 @@ static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* in
}
}
-void LocationsBuilderMIPS64::VisitVecReduce(HVecReduce* instruction) {
- CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
-}
-
-void InstructionCodeGeneratorMIPS64::VisitVecReduce(HVecReduce* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
void LocationsBuilderMIPS64::VisitVecCnv(HVecCnv* instruction) {
CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
}
@@ -822,14 +822,6 @@ void InstructionCodeGeneratorMIPS64::VisitVecUShr(HVecUShr* instruction) {
}
}
-void LocationsBuilderMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
-void InstructionCodeGeneratorMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
void LocationsBuilderMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
switch (instr->GetPackedType()) {
diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc
index 57e7dc6eed..e7aec76aff 100644
--- a/compiler/optimizing/code_generator_vector_x86.cc
+++ b/compiler/optimizing/code_generator_vector_x86.cc
@@ -27,28 +27,22 @@ namespace x86 {
void LocationsBuilderX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
- HInstruction* input = instruction->InputAt(0);
- bool is_zero = IsZeroBitPattern(input);
switch (instruction->GetPackedType()) {
case Primitive::kPrimLong:
- // Long needs extra temporary to load from the register pair.
- if (!is_zero) {
- locations->AddTemp(Location::RequiresFpuRegister());
- }
+ // Long needs extra temporary to load the register pair.
+ locations->AddTemp(Location::RequiresFpuRegister());
FALLTHROUGH_INTENDED;
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
case Primitive::kPrimChar:
case Primitive::kPrimShort:
case Primitive::kPrimInt:
- locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
- : Location::RequiresRegister());
+ locations->SetInAt(0, Location::RequiresRegister());
locations->SetOut(Location::RequiresFpuRegister());
break;
case Primitive::kPrimFloat:
case Primitive::kPrimDouble:
- locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
- : Location::RequiresFpuRegister());
+ locations->SetInAt(0, Location::RequiresFpuRegister());
locations->SetOut(Location::SameAsFirstInput());
break;
default:
@@ -59,53 +53,46 @@ void LocationsBuilderX86::VisitVecReplicateScalar(HVecReplicateScalar* instructi
void InstructionCodeGeneratorX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
LocationSummary* locations = instruction->GetLocations();
- XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
-
- // Shorthand for any type of zero.
- if (IsZeroBitPattern(instruction->InputAt(0))) {
- __ xorps(dst, dst);
- return;
- }
-
+ XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
switch (instruction->GetPackedType()) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
DCHECK_EQ(16u, instruction->GetVectorLength());
- __ movd(dst, locations->InAt(0).AsRegister<Register>());
- __ punpcklbw(dst, dst);
- __ punpcklwd(dst, dst);
- __ pshufd(dst, dst, Immediate(0));
+ __ movd(reg, locations->InAt(0).AsRegister<Register>());
+ __ punpcklbw(reg, reg);
+ __ punpcklwd(reg, reg);
+ __ pshufd(reg, reg, Immediate(0));
break;
case Primitive::kPrimChar:
case Primitive::kPrimShort:
DCHECK_EQ(8u, instruction->GetVectorLength());
- __ movd(dst, locations->InAt(0).AsRegister<Register>());
- __ punpcklwd(dst, dst);
- __ pshufd(dst, dst, Immediate(0));
+ __ movd(reg, locations->InAt(0).AsRegister<Register>());
+ __ punpcklwd(reg, reg);
+ __ pshufd(reg, reg, Immediate(0));
break;
case Primitive::kPrimInt:
DCHECK_EQ(4u, instruction->GetVectorLength());
- __ movd(dst, locations->InAt(0).AsRegister<Register>());
- __ pshufd(dst, dst, Immediate(0));
+ __ movd(reg, locations->InAt(0).AsRegister<Register>());
+ __ pshufd(reg, reg, Immediate(0));
break;
case Primitive::kPrimLong: {
XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
DCHECK_EQ(2u, instruction->GetVectorLength());
- __ movd(dst, locations->InAt(0).AsRegisterPairLow<Register>());
+ __ movd(reg, locations->InAt(0).AsRegisterPairLow<Register>());
__ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>());
- __ punpckldq(dst, tmp);
- __ punpcklqdq(dst, dst);
+ __ punpckldq(reg, tmp);
+ __ punpcklqdq(reg, reg);
break;
}
case Primitive::kPrimFloat:
DCHECK(locations->InAt(0).Equals(locations->Out()));
DCHECK_EQ(4u, instruction->GetVectorLength());
- __ shufps(dst, dst, Immediate(0));
+ __ shufps(reg, reg, Immediate(0));
break;
case Primitive::kPrimDouble:
DCHECK(locations->InAt(0).Equals(locations->Out()));
DCHECK_EQ(2u, instruction->GetVectorLength());
- __ shufpd(dst, dst, Immediate(0));
+ __ shufpd(reg, reg, Immediate(0));
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
@@ -113,65 +100,20 @@ void InstructionCodeGeneratorX86::VisitVecReplicateScalar(HVecReplicateScalar* i
}
}
-void LocationsBuilderX86::VisitVecExtractScalar(HVecExtractScalar* instruction) {
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
- switch (instruction->GetPackedType()) {
- case Primitive::kPrimLong:
- // Long needs extra temporary to store into the register pair.
- locations->AddTemp(Location::RequiresFpuRegister());
- FALLTHROUGH_INTENDED;
- case Primitive::kPrimBoolean:
- case Primitive::kPrimByte:
- case Primitive::kPrimChar:
- case Primitive::kPrimShort:
- case Primitive::kPrimInt:
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetOut(Location::RequiresRegister());
- break;
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble:
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetOut(Location::SameAsFirstInput());
- break;
- default:
- LOG(FATAL) << "Unsupported SIMD type";
- UNREACHABLE();
- }
+void LocationsBuilderX86::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
-void InstructionCodeGeneratorX86::VisitVecExtractScalar(HVecExtractScalar* instruction) {
- LocationSummary* locations = instruction->GetLocations();
- XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
- switch (instruction->GetPackedType()) {
- case Primitive::kPrimBoolean:
- case Primitive::kPrimByte:
- case Primitive::kPrimChar:
- case Primitive::kPrimShort: // TODO: up to here, and?
- LOG(FATAL) << "Unsupported SIMD type";
- UNREACHABLE();
- case Primitive::kPrimInt:
- DCHECK_LE(4u, instruction->GetVectorLength());
- DCHECK_LE(instruction->GetVectorLength(), 16u);
- __ movd(locations->Out().AsRegister<Register>(), src);
- break;
- case Primitive::kPrimLong: {
- XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
- DCHECK_EQ(2u, instruction->GetVectorLength());
- __ movd(locations->Out().AsRegisterPairLow<Register>(), src);
- __ pshufd(tmp, src, Immediate(1));
- __ movd(locations->Out().AsRegisterPairHigh<Register>(), tmp);
- break;
- }
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble:
- DCHECK_LE(2u, instruction->GetVectorLength());
- DCHECK_LE(instruction->GetVectorLength(), 4u);
- DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required
- break;
- default:
- LOG(FATAL) << "Unsupported SIMD type";
- UNREACHABLE();
- }
+void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderX86::VisitVecSumReduce(HVecSumReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorX86::VisitVecSumReduce(HVecSumReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
// Helper to set up locations for vector unary operations.
@@ -195,73 +137,6 @@ static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* in
}
}
-void LocationsBuilderX86::VisitVecReduce(HVecReduce* instruction) {
- CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
- // Long reduction or min/max require a temporary.
- if (instruction->GetPackedType() == Primitive::kPrimLong ||
- instruction->GetKind() == HVecReduce::kMin ||
- instruction->GetKind() == HVecReduce::kMax) {
- instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
- }
-}
-
-void InstructionCodeGeneratorX86::VisitVecReduce(HVecReduce* instruction) {
- LocationSummary* locations = instruction->GetLocations();
- XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
- XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
- switch (instruction->GetPackedType()) {
- case Primitive::kPrimInt:
- DCHECK_EQ(4u, instruction->GetVectorLength());
- switch (instruction->GetKind()) {
- case HVecReduce::kSum:
- __ movaps(dst, src);
- __ phaddd(dst, dst);
- __ phaddd(dst, dst);
- break;
- case HVecReduce::kMin: {
- XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
- __ movaps(tmp, src);
- __ movaps(dst, src);
- __ psrldq(tmp, Immediate(8));
- __ pminsd(dst, tmp);
- __ psrldq(tmp, Immediate(4));
- __ pminsd(dst, tmp);
- break;
- }
- case HVecReduce::kMax: {
- XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
- __ movaps(tmp, src);
- __ movaps(dst, src);
- __ psrldq(tmp, Immediate(8));
- __ pmaxsd(dst, tmp);
- __ psrldq(tmp, Immediate(4));
- __ pmaxsd(dst, tmp);
- break;
- }
- }
- break;
- case Primitive::kPrimLong: {
- DCHECK_EQ(2u, instruction->GetVectorLength());
- XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
- switch (instruction->GetKind()) {
- case HVecReduce::kSum:
- __ movaps(tmp, src);
- __ movaps(dst, src);
- __ punpckhqdq(tmp, tmp);
- __ paddq(dst, tmp);
- break;
- case HVecReduce::kMin:
- case HVecReduce::kMax:
- LOG(FATAL) << "Unsupported SIMD type";
- }
- break;
- }
- default:
- LOG(FATAL) << "Unsupported SIMD type";
- UNREACHABLE();
- }
-}
-
void LocationsBuilderX86::VisitVecCnv(HVecCnv* instruction) {
CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
}
@@ -946,91 +821,6 @@ void InstructionCodeGeneratorX86::VisitVecUShr(HVecUShr* instruction) {
}
}
-void LocationsBuilderX86::VisitVecSetScalars(HVecSetScalars* instruction) {
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
-
- DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
-
- HInstruction* input = instruction->InputAt(0);
- bool is_zero = IsZeroBitPattern(input);
-
- switch (instruction->GetPackedType()) {
- case Primitive::kPrimLong:
- // Long needs extra temporary to load from register pairs.
- if (!is_zero) {
- locations->AddTemp(Location::RequiresFpuRegister());
- }
- FALLTHROUGH_INTENDED;
- case Primitive::kPrimBoolean:
- case Primitive::kPrimByte:
- case Primitive::kPrimChar:
- case Primitive::kPrimShort:
- case Primitive::kPrimInt:
- locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
- : Location::RequiresRegister());
- locations->SetOut(Location::RequiresFpuRegister());
- break;
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble:
- locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
- : Location::RequiresFpuRegister());
- locations->SetOut(Location::RequiresFpuRegister());
- break;
- default:
- LOG(FATAL) << "Unsupported SIMD type";
- UNREACHABLE();
- }
-}
-
-void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction) {
- LocationSummary* locations = instruction->GetLocations();
- XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
-
- DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
-
- // Zero out all other elements first.
- __ xorps(dst, dst);
-
- // Shorthand for any type of zero.
- if (IsZeroBitPattern(instruction->InputAt(0))) {
- return;
- }
-
- // Set required elements.
- switch (instruction->GetPackedType()) {
- case Primitive::kPrimBoolean:
- case Primitive::kPrimByte:
- case Primitive::kPrimChar:
- case Primitive::kPrimShort: // TODO: up to here, and?
- LOG(FATAL) << "Unsupported SIMD type";
- UNREACHABLE();
- case Primitive::kPrimInt:
- DCHECK_EQ(4u, instruction->GetVectorLength());
- __ movd(dst, locations->InAt(0).AsRegister<Register>());
- break;
- case Primitive::kPrimLong: {
- XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
- DCHECK_EQ(2u, instruction->GetVectorLength());
- __ xorps(tmp, tmp);
- __ movd(dst, locations->InAt(0).AsRegisterPairLow<Register>());
- __ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>());
- __ punpckldq(dst, tmp);
- break;
- }
- case Primitive::kPrimFloat:
- DCHECK_EQ(4u, instruction->GetVectorLength());
- __ movss(dst, locations->InAt(1).AsFpuRegister<XmmRegister>());
- break;
- case Primitive::kPrimDouble:
- DCHECK_EQ(2u, instruction->GetVectorLength());
- __ movsd(dst, locations->InAt(1).AsFpuRegister<XmmRegister>());
- break;
- default:
- LOG(FATAL) << "Unsupported SIMD type";
- UNREACHABLE();
- }
-}
-
void LocationsBuilderX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
LOG(FATAL) << "No SIMD for " << instr->GetId();
}
@@ -1078,7 +868,6 @@ static Address VecAddress(LocationSummary* locations, size_t size, bool is_strin
case 8: scale = TIMES_8; break;
default: break;
}
- // Incorporate the string or array offset in the address computation.
uint32_t offset = is_string_char_at
? mirror::String::ValueOffset().Uint32Value()
: mirror::Array::DataOffset(size).Uint32Value();
@@ -1113,7 +902,7 @@ void InstructionCodeGeneratorX86::VisitVecLoad(HVecLoad* instruction) {
__ testb(Address(locations->InAt(0).AsRegister<Register>(), count_offset), Immediate(1));
__ j(kNotZero, &not_compressed);
// Zero extend 8 compressed bytes into 8 chars.
- __ movsd(reg, VecAddress(locations, 1, instruction->IsStringCharAt()));
+ __ movsd(reg, VecAddress(locations, 1, /*is_string_char_at*/ true));
__ pxor(tmp, tmp);
__ punpcklbw(reg, tmp);
__ jmp(&done);
diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc
index efab0db118..c7ee81c60d 100644
--- a/compiler/optimizing/code_generator_vector_x86_64.cc
+++ b/compiler/optimizing/code_generator_vector_x86_64.cc
@@ -27,8 +27,6 @@ namespace x86_64 {
void LocationsBuilderX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
- HInstruction* input = instruction->InputAt(0);
- bool is_zero = IsZeroBitPattern(input);
switch (instruction->GetPackedType()) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
@@ -36,14 +34,12 @@ void LocationsBuilderX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instru
case Primitive::kPrimShort:
case Primitive::kPrimInt:
case Primitive::kPrimLong:
- locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
- : Location::RequiresRegister());
+ locations->SetInAt(0, Location::RequiresRegister());
locations->SetOut(Location::RequiresFpuRegister());
break;
case Primitive::kPrimFloat:
case Primitive::kPrimDouble:
- locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
- : Location::RequiresFpuRegister());
+ locations->SetInAt(0, Location::RequiresFpuRegister());
locations->SetOut(Location::SameAsFirstInput());
break;
default:
@@ -54,49 +50,42 @@ void LocationsBuilderX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instru
void InstructionCodeGeneratorX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
LocationSummary* locations = instruction->GetLocations();
- XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
-
- // Shorthand for any type of zero.
- if (IsZeroBitPattern(instruction->InputAt(0))) {
- __ xorps(dst, dst);
- return;
- }
-
+ XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
switch (instruction->GetPackedType()) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
DCHECK_EQ(16u, instruction->GetVectorLength());
- __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>());
- __ punpcklbw(dst, dst);
- __ punpcklwd(dst, dst);
- __ pshufd(dst, dst, Immediate(0));
+ __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>());
+ __ punpcklbw(reg, reg);
+ __ punpcklwd(reg, reg);
+ __ pshufd(reg, reg, Immediate(0));
break;
case Primitive::kPrimChar:
case Primitive::kPrimShort:
DCHECK_EQ(8u, instruction->GetVectorLength());
- __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>());
- __ punpcklwd(dst, dst);
- __ pshufd(dst, dst, Immediate(0));
+ __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>());
+ __ punpcklwd(reg, reg);
+ __ pshufd(reg, reg, Immediate(0));
break;
case Primitive::kPrimInt:
DCHECK_EQ(4u, instruction->GetVectorLength());
- __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>());
- __ pshufd(dst, dst, Immediate(0));
+ __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>());
+ __ pshufd(reg, reg, Immediate(0));
break;
case Primitive::kPrimLong:
DCHECK_EQ(2u, instruction->GetVectorLength());
- __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>()); // is 64-bit
- __ punpcklqdq(dst, dst);
+ __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>()); // is 64-bit
+ __ punpcklqdq(reg, reg);
break;
case Primitive::kPrimFloat:
DCHECK(locations->InAt(0).Equals(locations->Out()));
DCHECK_EQ(4u, instruction->GetVectorLength());
- __ shufps(dst, dst, Immediate(0));
+ __ shufps(reg, reg, Immediate(0));
break;
case Primitive::kPrimDouble:
DCHECK(locations->InAt(0).Equals(locations->Out()));
DCHECK_EQ(2u, instruction->GetVectorLength());
- __ shufpd(dst, dst, Immediate(0));
+ __ shufpd(reg, reg, Immediate(0));
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
@@ -104,57 +93,20 @@ void InstructionCodeGeneratorX86_64::VisitVecReplicateScalar(HVecReplicateScalar
}
}
-void LocationsBuilderX86_64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
- switch (instruction->GetPackedType()) {
- case Primitive::kPrimBoolean:
- case Primitive::kPrimByte:
- case Primitive::kPrimChar:
- case Primitive::kPrimShort:
- case Primitive::kPrimInt:
- case Primitive::kPrimLong:
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetOut(Location::RequiresRegister());
- break;
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble:
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetOut(Location::SameAsFirstInput());
- break;
- default:
- LOG(FATAL) << "Unsupported SIMD type";
- UNREACHABLE();
- }
+void LocationsBuilderX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
-void InstructionCodeGeneratorX86_64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
- LocationSummary* locations = instruction->GetLocations();
- XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
- switch (instruction->GetPackedType()) {
- case Primitive::kPrimBoolean:
- case Primitive::kPrimByte:
- case Primitive::kPrimChar:
- case Primitive::kPrimShort: // TODO: up to here, and?
- LOG(FATAL) << "Unsupported SIMD type";
- UNREACHABLE();
- case Primitive::kPrimInt:
- DCHECK_EQ(4u, instruction->GetVectorLength());
- __ movd(locations->Out().AsRegister<CpuRegister>(), src);
- break;
- case Primitive::kPrimLong:
- DCHECK_EQ(2u, instruction->GetVectorLength());
- __ movd(locations->Out().AsRegister<CpuRegister>(), src); // is 64-bit
- break;
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble:
- DCHECK_LE(2u, instruction->GetVectorLength());
- DCHECK_LE(instruction->GetVectorLength(), 4u);
- DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required
- break;
- default:
- LOG(FATAL) << "Unsupported SIMD type";
- UNREACHABLE();
- }
+void InstructionCodeGeneratorX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderX86_64::VisitVecSumReduce(HVecSumReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecSumReduce(HVecSumReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
// Helper to set up locations for vector unary operations.
@@ -178,73 +130,6 @@ static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* in
}
}
-void LocationsBuilderX86_64::VisitVecReduce(HVecReduce* instruction) {
- CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
- // Long reduction or min/max require a temporary.
- if (instruction->GetPackedType() == Primitive::kPrimLong ||
- instruction->GetKind() == HVecReduce::kMin ||
- instruction->GetKind() == HVecReduce::kMax) {
- instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
- }
-}
-
-void InstructionCodeGeneratorX86_64::VisitVecReduce(HVecReduce* instruction) {
- LocationSummary* locations = instruction->GetLocations();
- XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
- XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
- switch (instruction->GetPackedType()) {
- case Primitive::kPrimInt:
- DCHECK_EQ(4u, instruction->GetVectorLength());
- switch (instruction->GetKind()) {
- case HVecReduce::kSum:
- __ movaps(dst, src);
- __ phaddd(dst, dst);
- __ phaddd(dst, dst);
- break;
- case HVecReduce::kMin: {
- XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
- __ movaps(tmp, src);
- __ movaps(dst, src);
- __ psrldq(tmp, Immediate(8));
- __ pminsd(dst, tmp);
- __ psrldq(tmp, Immediate(4));
- __ pminsd(dst, tmp);
- break;
- }
- case HVecReduce::kMax: {
- XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
- __ movaps(tmp, src);
- __ movaps(dst, src);
- __ psrldq(tmp, Immediate(8));
- __ pmaxsd(dst, tmp);
- __ psrldq(tmp, Immediate(4));
- __ pmaxsd(dst, tmp);
- break;
- }
- }
- break;
- case Primitive::kPrimLong: {
- DCHECK_EQ(2u, instruction->GetVectorLength());
- XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
- switch (instruction->GetKind()) {
- case HVecReduce::kSum:
- __ movaps(tmp, src);
- __ movaps(dst, src);
- __ punpckhqdq(tmp, tmp);
- __ paddq(dst, tmp);
- break;
- case HVecReduce::kMin:
- case HVecReduce::kMax:
- LOG(FATAL) << "Unsupported SIMD type";
- }
- break;
- }
- default:
- LOG(FATAL) << "Unsupported SIMD type";
- UNREACHABLE();
- }
-}
-
void LocationsBuilderX86_64::VisitVecCnv(HVecCnv* instruction) {
CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
}
@@ -929,81 +814,6 @@ void InstructionCodeGeneratorX86_64::VisitVecUShr(HVecUShr* instruction) {
}
}
-void LocationsBuilderX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
-
- DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
-
- HInstruction* input = instruction->InputAt(0);
- bool is_zero = IsZeroBitPattern(input);
-
- switch (instruction->GetPackedType()) {
- case Primitive::kPrimBoolean:
- case Primitive::kPrimByte:
- case Primitive::kPrimChar:
- case Primitive::kPrimShort:
- case Primitive::kPrimInt:
- case Primitive::kPrimLong:
- locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
- : Location::RequiresRegister());
- locations->SetOut(Location::RequiresFpuRegister());
- break;
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble:
- locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
- : Location::RequiresFpuRegister());
- locations->SetOut(Location::RequiresFpuRegister());
- break;
- default:
- LOG(FATAL) << "Unsupported SIMD type";
- UNREACHABLE();
- }
-}
-
-void InstructionCodeGeneratorX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
- LocationSummary* locations = instruction->GetLocations();
- XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
-
- DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
-
- // Zero out all other elements first.
- __ xorps(dst, dst);
-
- // Shorthand for any type of zero.
- if (IsZeroBitPattern(instruction->InputAt(0))) {
- return;
- }
-
- // Set required elements.
- switch (instruction->GetPackedType()) {
- case Primitive::kPrimBoolean:
- case Primitive::kPrimByte:
- case Primitive::kPrimChar:
- case Primitive::kPrimShort: // TODO: up to here, and?
- LOG(FATAL) << "Unsupported SIMD type";
- UNREACHABLE();
- case Primitive::kPrimInt:
- DCHECK_EQ(4u, instruction->GetVectorLength());
- __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>());
- break;
- case Primitive::kPrimLong:
- DCHECK_EQ(2u, instruction->GetVectorLength());
- __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>()); // is 64-bit
- break;
- case Primitive::kPrimFloat:
- DCHECK_EQ(4u, instruction->GetVectorLength());
- __ movss(dst, locations->InAt(0).AsFpuRegister<XmmRegister>());
- break;
- case Primitive::kPrimDouble:
- DCHECK_EQ(2u, instruction->GetVectorLength());
- __ movsd(dst, locations->InAt(0).AsFpuRegister<XmmRegister>());
- break;
- default:
- LOG(FATAL) << "Unsupported SIMD type";
- UNREACHABLE();
- }
-}
-
void LocationsBuilderX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
LOG(FATAL) << "No SIMD for " << instr->GetId();
}
@@ -1051,7 +861,6 @@ static Address VecAddress(LocationSummary* locations, size_t size, bool is_strin
case 8: scale = TIMES_8; break;
default: break;
}
- // Incorporate the string or array offset in the address computation.
uint32_t offset = is_string_char_at
? mirror::String::ValueOffset().Uint32Value()
: mirror::Array::DataOffset(size).Uint32Value();
@@ -1086,7 +895,7 @@ void InstructionCodeGeneratorX86_64::VisitVecLoad(HVecLoad* instruction) {
__ testb(Address(locations->InAt(0).AsRegister<CpuRegister>(), count_offset), Immediate(1));
__ j(kNotZero, &not_compressed);
// Zero extend 8 compressed bytes into 8 chars.
- __ movsd(reg, VecAddress(locations, 1, instruction->IsStringCharAt()));
+ __ movsd(reg, VecAddress(locations, 1, /*is_string_char_at*/ true));
__ pxor(tmp, tmp);
__ punpcklbw(reg, tmp);
__ jmp(&done);
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index f8f4eb2ae3..027ba7741c 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -285,19 +285,6 @@ static bool HasReductionFormat(HInstruction* reduction, HInstruction* phi) {
return false;
}
-// Translates operation to reduction kind.
-static HVecReduce::ReductionKind GetReductionKind(HInstruction* reduction) {
- if (reduction->IsVecAdd() || reduction->IsVecSub()) {
- return HVecReduce::kSum;
- } else if (reduction->IsVecMin()) {
- return HVecReduce::kMin;
- } else if (reduction->IsVecMax()) {
- return HVecReduce::kMax;
- }
- LOG(FATAL) << "Unsupported SIMD reduction";
- UNREACHABLE();
-}
-
// Test vector restrictions.
static bool HasVectorRestrictions(uint64_t restrictions, uint64_t tested) {
return (restrictions & tested) != 0;
@@ -347,8 +334,7 @@ HLoopOptimization::HLoopOptimization(HGraph* graph,
vector_peeling_candidate_(nullptr),
vector_runtime_test_a_(nullptr),
vector_runtime_test_b_(nullptr),
- vector_map_(nullptr),
- vector_permanent_map_(nullptr) {
+ vector_map_(nullptr) {
}
void HLoopOptimization::Run() {
@@ -402,14 +388,11 @@ void HLoopOptimization::LocalRun() {
ArenaSet<ArrayReference> refs(loop_allocator_->Adapter(kArenaAllocLoopOptimization));
ArenaSafeMap<HInstruction*, HInstruction*> map(
std::less<HInstruction*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization));
- ArenaSafeMap<HInstruction*, HInstruction*> perm(
- std::less<HInstruction*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization));
// Attach.
iset_ = &iset;
reductions_ = &reds;
vector_refs_ = &refs;
vector_map_ = &map;
- vector_permanent_map_ = &perm;
// Traverse.
TraverseLoopsInnerToOuter(top_loop_);
// Detach.
@@ -417,7 +400,6 @@ void HLoopOptimization::LocalRun() {
reductions_ = nullptr;
vector_refs_ = nullptr;
vector_map_ = nullptr;
- vector_permanent_map_ = nullptr;
}
}
@@ -621,6 +603,7 @@ bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) {
// Vectorize loop, if possible and valid.
if (kEnableVectorization &&
TrySetSimpleLoopHeader(header, &main_phi) &&
+ reductions_->empty() && // TODO: possible with some effort
ShouldVectorize(node, body, trip_count) &&
TryAssignLastValue(node->loop_info, main_phi, preheader, /*collect_loop_uses*/ true)) {
Vectorize(node, body, exit, trip_count);
@@ -819,13 +802,6 @@ void HLoopOptimization::Vectorize(LoopNode* node,
/*unroll*/ 1);
}
- // Link reductions to their final uses.
- for (auto i = reductions_->begin(); i != reductions_->end(); ++i) {
- if (i->first->IsPhi()) {
- i->first->ReplaceWith(ReduceAndExtractIfNeeded(i->second));
- }
- }
-
// Remove the original loop by disconnecting the body block
// and removing all instructions from the header.
block->DisconnectAndDelete();
@@ -865,10 +841,21 @@ void HLoopOptimization::GenerateNewLoop(LoopNode* node,
vector_header_->AddInstruction(cond);
vector_header_->AddInstruction(new (global_allocator_) HIf(cond));
vector_index_ = phi;
- vector_permanent_map_->clear(); // preserved over unrolling
for (uint32_t u = 0; u < unroll; u++) {
+ // Clear map, leaving loop invariants setup during unrolling.
+ if (u == 0) {
+ vector_map_->clear();
+ } else {
+ for (auto i = vector_map_->begin(); i != vector_map_->end(); ) {
+ if (i->second->IsVecReplicateScalar()) {
+ DCHECK(node->loop_info->IsDefinedOutOfTheLoop(i->first));
+ ++i;
+ } else {
+ i = vector_map_->erase(i);
+ }
+ }
+ }
// Generate instruction map.
- vector_map_->clear();
for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
bool vectorized_def = VectorizeDef(node, it.Current(), /*generate_code*/ true);
DCHECK(vectorized_def);
@@ -885,17 +872,9 @@ void HLoopOptimization::GenerateNewLoop(LoopNode* node,
}
}
}
- // Generate the induction.
vector_index_ = new (global_allocator_) HAdd(induc_type, vector_index_, step);
Insert(vector_body_, vector_index_);
}
- // Finalize phi inputs for the reductions (if any).
- for (auto i = reductions_->begin(); i != reductions_->end(); ++i) {
- if (!i->first->IsPhi()) {
- DCHECK(i->second->IsPhi());
- GenerateVecReductionPhiInputs(i->second->AsPhi(), i->first);
- }
- }
// Finalize phi inputs for the loop index.
phi->AddInput(lo);
phi->AddInput(vector_index_);
@@ -931,23 +910,6 @@ bool HLoopOptimization::VectorizeDef(LoopNode* node,
}
return false;
}
- // Accept a left-hand-side reduction for
- // (1) supported vector type,
- // (2) vectorizable right-hand-side value.
- auto redit = reductions_->find(instruction);
- if (redit != reductions_->end()) {
- Primitive::Type type = instruction->GetType();
- if (TrySetVectorType(type, &restrictions) &&
- VectorizeUse(node, instruction, generate_code, type, restrictions)) {
- if (generate_code) {
- HInstruction* new_red = vector_map_->Get(instruction);
- vector_permanent_map_->Put(new_red, vector_map_->Get(redit->second));
- vector_permanent_map_->Overwrite(redit->second, new_red);
- }
- return true;
- }
- return false;
- }
// Branch back okay.
if (instruction->IsGoto()) {
return true;
@@ -1003,21 +965,6 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node,
}
return true;
}
- } else if (instruction->IsPhi()) {
- // Accept particular phi operations.
- if (reductions_->find(instruction) != reductions_->end()) {
- // Deal with vector restrictions.
- if (HasVectorRestrictions(restrictions, kNoReduction)) {
- return false;
- }
- // Accept a reduction.
- if (generate_code) {
- GenerateVecReductionPhi(instruction->AsPhi());
- }
- return true;
- }
- // TODO: accept right-hand-side induction?
- return false;
} else if (instruction->IsTypeConversion()) {
// Accept particular type conversions.
HTypeConversion* conversion = instruction->AsTypeConversion();
@@ -1208,14 +1155,14 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- *restrictions |= kNoDiv | kNoReduction;
+ *restrictions |= kNoDiv;
return TrySetVectorLength(8);
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction;
+ *restrictions |= kNoDiv | kNoStringCharAt;
return TrySetVectorLength(4);
case Primitive::kPrimInt:
- *restrictions |= kNoDiv | kNoReduction;
+ *restrictions |= kNoDiv;
return TrySetVectorLength(2);
default:
break;
@@ -1227,11 +1174,11 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- *restrictions |= kNoDiv | kNoReduction;
+ *restrictions |= kNoDiv;
return TrySetVectorLength(16);
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- *restrictions |= kNoDiv | kNoReduction;
+ *restrictions |= kNoDiv;
return TrySetVectorLength(8);
case Primitive::kPrimInt:
*restrictions |= kNoDiv;
@@ -1240,10 +1187,8 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric
*restrictions |= kNoDiv | kNoMul | kNoMinMax;
return TrySetVectorLength(2);
case Primitive::kPrimFloat:
- *restrictions |= kNoReduction;
return TrySetVectorLength(4);
case Primitive::kPrimDouble:
- *restrictions |= kNoReduction;
return TrySetVectorLength(2);
default:
return false;
@@ -1255,12 +1200,11 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- *restrictions |=
- kNoMul | kNoDiv | kNoShift | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd | kNoReduction;
+ *restrictions |= kNoMul | kNoDiv | kNoShift | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd;
return TrySetVectorLength(16);
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- *restrictions |= kNoDiv | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd | kNoReduction;
+ *restrictions |= kNoDiv | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd;
return TrySetVectorLength(8);
case Primitive::kPrimInt:
*restrictions |= kNoDiv;
@@ -1269,10 +1213,10 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric
*restrictions |= kNoMul | kNoDiv | kNoShr | kNoAbs | kNoMinMax;
return TrySetVectorLength(2);
case Primitive::kPrimFloat:
- *restrictions |= kNoMinMax | kNoReduction; // minmax: -0.0 vs +0.0
+ *restrictions |= kNoMinMax; // -0.0 vs +0.0
return TrySetVectorLength(4);
case Primitive::kPrimDouble:
- *restrictions |= kNoMinMax | kNoReduction; // minmax: -0.0 vs +0.0
+ *restrictions |= kNoMinMax; // -0.0 vs +0.0
return TrySetVectorLength(2);
default:
break;
@@ -1284,23 +1228,23 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- *restrictions |= kNoDiv | kNoReduction;
+ *restrictions |= kNoDiv;
return TrySetVectorLength(16);
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction;
+ *restrictions |= kNoDiv | kNoStringCharAt;
return TrySetVectorLength(8);
case Primitive::kPrimInt:
- *restrictions |= kNoDiv | kNoReduction;
+ *restrictions |= kNoDiv;
return TrySetVectorLength(4);
case Primitive::kPrimLong:
- *restrictions |= kNoDiv | kNoReduction;
+ *restrictions |= kNoDiv;
return TrySetVectorLength(2);
case Primitive::kPrimFloat:
- *restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN)
+ *restrictions |= kNoMinMax; // min/max(x, NaN)
return TrySetVectorLength(4);
case Primitive::kPrimDouble:
- *restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN)
+ *restrictions |= kNoMinMax; // min/max(x, NaN)
return TrySetVectorLength(2);
default:
break;
@@ -1312,23 +1256,23 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- *restrictions |= kNoDiv | kNoReduction;
+ *restrictions |= kNoDiv;
return TrySetVectorLength(16);
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction;
+ *restrictions |= kNoDiv | kNoStringCharAt;
return TrySetVectorLength(8);
case Primitive::kPrimInt:
- *restrictions |= kNoDiv | kNoReduction;
+ *restrictions |= kNoDiv;
return TrySetVectorLength(4);
case Primitive::kPrimLong:
- *restrictions |= kNoDiv | kNoReduction;
+ *restrictions |= kNoDiv;
return TrySetVectorLength(2);
case Primitive::kPrimFloat:
- *restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN)
+ *restrictions |= kNoMinMax; // min/max(x, NaN)
return TrySetVectorLength(4);
case Primitive::kPrimDouble:
- *restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN)
+ *restrictions |= kNoMinMax; // min/max(x, NaN)
return TrySetVectorLength(2);
default:
break;
@@ -1361,16 +1305,9 @@ void HLoopOptimization::GenerateVecInv(HInstruction* org, Primitive::Type type)
return;
}
// In vector code, explicit scalar expansion is needed.
- HInstruction* vector = nullptr;
- auto it = vector_permanent_map_->find(org);
- if (it != vector_permanent_map_->end()) {
- vector = it->second; // reuse during unrolling
- } else {
- vector = new (global_allocator_) HVecReplicateScalar(
- global_allocator_, org, type, vector_length_);
- vector_permanent_map_->Put(org, Insert(vector_preheader_, vector));
- }
- vector_map_->Put(org, vector);
+ HInstruction* vector = new (global_allocator_) HVecReplicateScalar(
+ global_allocator_, org, type, vector_length_);
+ vector_map_->Put(org, Insert(vector_preheader_, vector));
}
}
@@ -1425,78 +1362,6 @@ void HLoopOptimization::GenerateVecMem(HInstruction* org,
vector_map_->Put(org, vector);
}
-void HLoopOptimization::GenerateVecReductionPhi(HPhi* phi) {
- DCHECK(reductions_->find(phi) != reductions_->end());
- DCHECK(reductions_->Get(phi->InputAt(1)) == phi);
- HInstruction* vector = nullptr;
- if (vector_mode_ == kSequential) {
- HPhi* new_phi = new (global_allocator_) HPhi(
- global_allocator_, kNoRegNumber, 0, phi->GetType());
- vector_header_->AddPhi(new_phi);
- vector = new_phi;
- } else {
- // Link vector reduction back to prior unrolled update, or a first phi.
- auto it = vector_permanent_map_->find(phi);
- if (it != vector_permanent_map_->end()) {
- vector = it->second;
- } else {
- HPhi* new_phi = new (global_allocator_) HPhi(
- global_allocator_, kNoRegNumber, 0, HVecOperation::kSIMDType);
- vector_header_->AddPhi(new_phi);
- vector = new_phi;
- }
- }
- vector_map_->Put(phi, vector);
-}
-
-void HLoopOptimization::GenerateVecReductionPhiInputs(HPhi* phi, HInstruction* reduction) {
- HInstruction* new_phi = vector_map_->Get(phi);
- HInstruction* new_init = reductions_->Get(phi);
- HInstruction* new_red = vector_map_->Get(reduction);
- // Link unrolled vector loop back to new phi.
- for (; !new_phi->IsPhi(); new_phi = vector_permanent_map_->Get(new_phi)) {
- DCHECK(new_phi->IsVecOperation());
- }
- // Prepare the new initialization.
- if (vector_mode_ == kVector) {
- // Generate a [initial, 0, .., 0] vector.
- new_init = Insert(
- vector_preheader_,
- new (global_allocator_) HVecSetScalars(
- global_allocator_, &new_init, phi->GetType(), vector_length_, 1));
- } else {
- new_init = ReduceAndExtractIfNeeded(new_init);
- }
- // Set the phi inputs.
- DCHECK(new_phi->IsPhi());
- new_phi->AsPhi()->AddInput(new_init);
- new_phi->AsPhi()->AddInput(new_red);
- // New feed value for next phi (safe mutation in iteration).
- reductions_->find(phi)->second = new_phi;
-}
-
-HInstruction* HLoopOptimization::ReduceAndExtractIfNeeded(HInstruction* instruction) {
- if (instruction->IsPhi()) {
- HInstruction* input = instruction->InputAt(1);
- if (input->IsVecOperation()) {
- Primitive::Type type = input->AsVecOperation()->GetPackedType();
- HBasicBlock* exit = instruction->GetBlock()->GetSuccessors()[0];
- // Generate a vector reduction and scalar extract
- // x = REDUCE( [x_1, .., x_n] )
- // y = x_1
- // along the exit of the defining loop.
- HVecReduce::ReductionKind kind = GetReductionKind(input);
- HInstruction* reduce = new (global_allocator_) HVecReduce(
- global_allocator_, instruction, type, vector_length_, kind);
- exit->InsertInstructionBefore(reduce, exit->GetFirstInstruction());
- instruction = new (global_allocator_) HVecExtractScalar(
- global_allocator_, reduce, type, vector_length_, 0);
- exit->InsertInstructionAfter(instruction, reduce);
- }
- }
- return instruction;
-}
-
#define GENERATE_VEC(x, y) \
if (vector_mode_ == kVector) { \
vector = (x); \
@@ -1677,9 +1542,10 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node,
// Test for top level arithmetic shift right x >> 1 or logical shift right x >>> 1
// (note whether the sign bit in wider precision is shifted in has no effect
// on the narrow precision computed by the idiom).
+ int64_t distance = 0;
if ((instruction->IsShr() ||
instruction->IsUShr()) &&
- IsInt64Value(instruction->InputAt(1), 1)) {
+ IsInt64AndGet(instruction->InputAt(1), /*out*/ &distance) && distance == 1) {
// Test for (a + b + c) >> 1 for optional constant c.
HInstruction* a = nullptr;
HInstruction* b = nullptr;
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index ba9126c5f6..49be8a3fb4 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -62,18 +62,17 @@ class HLoopOptimization : public HOptimization {
* Vectorization restrictions (bit mask).
*/
enum VectorRestrictions {
- kNone = 0, // no restrictions
- kNoMul = 1 << 0, // no multiplication
- kNoDiv = 1 << 1, // no division
- kNoShift = 1 << 2, // no shift
- kNoShr = 1 << 3, // no arithmetic shift right
- kNoHiBits = 1 << 4, // "wider" operations cannot bring in higher order bits
- kNoSignedHAdd = 1 << 5, // no signed halving add
- kNoUnroundedHAdd = 1 << 6, // no unrounded halving add
- kNoAbs = 1 << 7, // no absolute value
- kNoMinMax = 1 << 8, // no min/max
- kNoStringCharAt = 1 << 9, // no StringCharAt
- kNoReduction = 1 << 10, // no reduction
+ kNone = 0, // no restrictions
+ kNoMul = 1, // no multiplication
+ kNoDiv = 2, // no division
+ kNoShift = 4, // no shift
+ kNoShr = 8, // no arithmetic shift right
+ kNoHiBits = 16, // "wider" operations cannot bring in higher order bits
+ kNoSignedHAdd = 32, // no signed halving add
+ kNoUnroundedHAdd = 64, // no unrounded halving add
+ kNoAbs = 128, // no absolute value
+ kNoMinMax = 256, // no min/max
+ kNoStringCharAt = 512, // no StringCharAt
};
/*
@@ -156,9 +155,6 @@ class HLoopOptimization : public HOptimization {
HInstruction* opb,
HInstruction* offset,
Primitive::Type type);
- void GenerateVecReductionPhi(HPhi* phi);
- void GenerateVecReductionPhiInputs(HPhi* phi, HInstruction* reduction);
- HInstruction* ReduceAndExtractIfNeeded(HInstruction* instruction);
void GenerateVecOp(HInstruction* org,
HInstruction* opa,
HInstruction* opb,
@@ -257,10 +253,6 @@ class HLoopOptimization : public HOptimization {
// Contents reside in phase-local heap memory.
ArenaSafeMap<HInstruction*, HInstruction*>* vector_map_;
- // Permanent mapping used during vectorization synthesis.
- // Contents reside in phase-local heap memory.
- ArenaSafeMap<HInstruction*, HInstruction*>* vector_permanent_map_;
-
// Temporary vectorization bookkeeping.
VectorMode vector_mode_; // synthesis mode
HBasicBlock* vector_preheader_; // preheader of the new loop
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 869fdd4182..f60d532c37 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1374,8 +1374,7 @@ class HLoopInformationOutwardIterator : public ValueObject {
M(UShr, BinaryOperation) \
M(Xor, BinaryOperation) \
M(VecReplicateScalar, VecUnaryOperation) \
- M(VecExtractScalar, VecUnaryOperation) \
- M(VecReduce, VecUnaryOperation) \
+ M(VecSumReduce, VecUnaryOperation) \
M(VecCnv, VecUnaryOperation) \
M(VecNeg, VecUnaryOperation) \
M(VecAbs, VecUnaryOperation) \
@@ -7031,17 +7030,6 @@ inline bool IsInt64AndGet(HInstruction* instruction, /*out*/ int64_t* value) {
return false;
}
-// Returns true iff instruction is the given integral constant.
-inline bool IsInt64Value(HInstruction* instruction, int64_t value) {
- int64_t val = 0;
- return IsInt64AndGet(instruction, &val) && val == value;
-}
-
-// Returns true iff instruction is a zero bit pattern.
-inline bool IsZeroBitPattern(HInstruction* instruction) {
- return instruction->IsConstant() && instruction->AsConstant()->IsZeroBitPattern();
-}
-
#define INSTRUCTION_TYPE_CHECK(type, super) \
inline bool HInstruction::Is##type() const { return GetKind() == k##type; } \
inline const H##type* HInstruction::As##type() const { \
diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h
index 886d75e5c7..6261171a00 100644
--- a/compiler/optimizing/nodes_vector.h
+++ b/compiler/optimizing/nodes_vector.h
@@ -63,10 +63,6 @@ class Alignment {
// GetVectorLength() x GetPackedType() operations simultaneously.
class HVecOperation : public HVariableInputSizeInstruction {
public:
- // A SIMD operation looks like a FPU location.
- // TODO: we could introduce SIMD types in HIR.
- static constexpr Primitive::Type kSIMDType = Primitive::kPrimDouble;
-
HVecOperation(ArenaAllocator* arena,
Primitive::Type packed_type,
SideEffects side_effects,
@@ -93,9 +89,10 @@ class HVecOperation : public HVariableInputSizeInstruction {
return vector_length_ * Primitive::ComponentSize(GetPackedType());
}
- // Returns the type of the vector operation.
+ // Returns the type of the vector operation: a SIMD operation looks like a FPU location.
+ // TODO: we could introduce SIMD types in HIR.
Primitive::Type GetType() const OVERRIDE {
- return kSIMDType;
+ return Primitive::kPrimDouble;
}
// Returns the true component type packed in a vector.
@@ -223,11 +220,8 @@ class HVecMemoryOperation : public HVecOperation {
DISALLOW_COPY_AND_ASSIGN(HVecMemoryOperation);
};
-// Packed type consistency checker ("same vector length" integral types may mix freely).
+// Packed type consistency checker (same vector length integral types may mix freely).
inline static bool HasConsistentPackedTypes(HInstruction* input, Primitive::Type type) {
- if (input->IsPhi()) {
- return input->GetType() == HVecOperation::kSIMDType; // carries SIMD
- }
DCHECK(input->IsVecOperation());
Primitive::Type input_type = input->AsVecOperation()->GetPackedType();
switch (input_type) {
@@ -271,77 +265,27 @@ class HVecReplicateScalar FINAL : public HVecUnaryOperation {
DISALLOW_COPY_AND_ASSIGN(HVecReplicateScalar);
};
-// Extracts a particular scalar from the given vector,
-// viz. extract[ x1, .. , xn ] = x_i.
-//
-// TODO: for now only i == 1 case supported.
-class HVecExtractScalar FINAL : public HVecUnaryOperation {
- public:
- HVecExtractScalar(ArenaAllocator* arena,
- HInstruction* input,
- Primitive::Type packed_type,
- size_t vector_length,
- size_t index,
- uint32_t dex_pc = kNoDexPc)
+// Sum-reduces the given vector into a shorter vector (m < n) or scalar (m = 1),
+// viz. sum-reduce[ x1, .. , xn ] = [ y1, .., ym ], where yi = sum_j x_j.
+class HVecSumReduce FINAL : public HVecUnaryOperation {
+ HVecSumReduce(ArenaAllocator* arena,
+ HInstruction* input,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
: HVecUnaryOperation(arena, input, packed_type, vector_length, dex_pc) {
DCHECK(HasConsistentPackedTypes(input, packed_type));
- DCHECK_LT(index, vector_length);
- DCHECK_EQ(index, 0u);
- }
-
- // Yields a single component in the vector.
- Primitive::Type GetType() const OVERRIDE {
- return GetPackedType();
- }
-
- // An extract needs to stay in place, since SIMD registers are not
- // kept alive across vector loop boundaries (yet).
- bool CanBeMoved() const OVERRIDE { return false; }
-
- DECLARE_INSTRUCTION(VecExtractScalar);
-
- private:
- DISALLOW_COPY_AND_ASSIGN(HVecExtractScalar);
-};
-
-// Reduces the given vector into the first element as sum/min/max,
-// viz. sum-reduce[ x1, .. , xn ] = [ y, ---- ], where y = sum xi
-// and the "-" denotes "don't care" (implementation dependent).
-class HVecReduce FINAL : public HVecUnaryOperation {
- public:
- enum ReductionKind {
- kSum = 1,
- kMin = 2,
- kMax = 3
- };
-
- HVecReduce(ArenaAllocator* arena,
- HInstruction* input,
- Primitive::Type packed_type,
- size_t vector_length,
- ReductionKind kind,
- uint32_t dex_pc = kNoDexPc)
- : HVecUnaryOperation(arena, input, packed_type, vector_length, dex_pc),
- kind_(kind) {
- DCHECK(HasConsistentPackedTypes(input, packed_type));
}
- ReductionKind GetKind() const { return kind_; }
+ // TODO: probably integral promotion
+ Primitive::Type GetType() const OVERRIDE { return GetPackedType(); }
bool CanBeMoved() const OVERRIDE { return true; }
- bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
- DCHECK(other->IsVecReduce());
- const HVecReduce* o = other->AsVecReduce();
- return HVecOperation::InstructionDataEquals(o) && GetKind() == o->GetKind();
- }
-
- DECLARE_INSTRUCTION(VecReduce);
+ DECLARE_INSTRUCTION(VecSumReduce);
private:
- const ReductionKind kind_;
-
- DISALLOW_COPY_AND_ASSIGN(HVecReduce);
+ DISALLOW_COPY_AND_ASSIGN(HVecSumReduce);
};
// Converts every component in the vector,
@@ -810,23 +754,20 @@ class HVecUShr FINAL : public HVecBinaryOperation {
//
// Assigns the given scalar elements to a vector,
-// viz. set( array(x1, .., xn) ) = [ x1, .. , xn ] if n == m,
-// set( array(x1, .., xm) ) = [ x1, .. , xm, 0, .., 0 ] if m < n.
+// viz. set( array(x1, .., xn) ) = [ x1, .. , xn ].
class HVecSetScalars FINAL : public HVecOperation {
- public:
HVecSetScalars(ArenaAllocator* arena,
HInstruction** scalars, // array
Primitive::Type packed_type,
size_t vector_length,
- size_t number_of_scalars,
uint32_t dex_pc = kNoDexPc)
: HVecOperation(arena,
packed_type,
SideEffects::None(),
- number_of_scalars,
+ /* number_of_inputs */ vector_length,
vector_length,
dex_pc) {
- for (size_t i = 0; i < number_of_scalars; i++) {
+ for (size_t i = 0; i < vector_length; i++) {
DCHECK(!scalars[i]->IsVecOperation());
SetRawInputAt(0, scalars[i]);
}
diff --git a/compiler/optimizing/nodes_vector_test.cc b/compiler/optimizing/nodes_vector_test.cc
index 5a56a2c210..0238ea4602 100644
--- a/compiler/optimizing/nodes_vector_test.cc
+++ b/compiler/optimizing/nodes_vector_test.cc
@@ -332,32 +332,4 @@ TEST_F(NodesVectorTest, VectorOperationMattersOnMultiplyAccumulate) {
EXPECT_FALSE(v1->Equals(v3)); // different vector lengths
}
-TEST_F(NodesVectorTest, VectorKindMattersOnReduce) {
- HVecOperation* v0 = new (&allocator_)
- HVecReplicateScalar(&allocator_, parameter_, Primitive::kPrimInt, 4);
-
- HVecReduce* v1 = new (&allocator_) HVecReduce(
- &allocator_, v0, Primitive::kPrimInt, 4, HVecReduce::kSum);
- HVecReduce* v2 = new (&allocator_) HVecReduce(
- &allocator_, v0, Primitive::kPrimInt, 4, HVecReduce::kMin);
- HVecReduce* v3 = new (&allocator_) HVecReduce(
- &allocator_, v0, Primitive::kPrimInt, 4, HVecReduce::kMax);
-
- EXPECT_FALSE(v0->CanBeMoved());
- EXPECT_TRUE(v1->CanBeMoved());
- EXPECT_TRUE(v2->CanBeMoved());
- EXPECT_TRUE(v3->CanBeMoved());
-
- EXPECT_EQ(HVecReduce::kSum, v1->GetKind());
- EXPECT_EQ(HVecReduce::kMin, v2->GetKind());
- EXPECT_EQ(HVecReduce::kMax, v3->GetKind());
-
- EXPECT_TRUE(v1->Equals(v1));
- EXPECT_TRUE(v2->Equals(v2));
- EXPECT_TRUE(v3->Equals(v3));
-
- EXPECT_FALSE(v1->Equals(v2)); // different kinds
- EXPECT_FALSE(v1->Equals(v3));
-}
-
} // namespace art
diff --git a/compiler/optimizing/scheduler_arm64.cc b/compiler/optimizing/scheduler_arm64.cc
index 1d9d28ab24..510619faf9 100644
--- a/compiler/optimizing/scheduler_arm64.cc
+++ b/compiler/optimizing/scheduler_arm64.cc
@@ -215,12 +215,12 @@ void SchedulingLatencyVisitorARM64::VisitVecReplicateScalar(
last_visited_latency_ = kArm64SIMDReplicateOpLatency;
}
-void SchedulingLatencyVisitorARM64::VisitVecExtractScalar(HVecExtractScalar* instr) {
- HandleSimpleArithmeticSIMD(instr);
+void SchedulingLatencyVisitorARM64::VisitVecSetScalars(HVecSetScalars* instr) {
+ LOG(FATAL) << "Unsupported SIMD instruction " << instr->GetId();
}
-void SchedulingLatencyVisitorARM64::VisitVecReduce(HVecReduce* instr) {
- HandleSimpleArithmeticSIMD(instr);
+void SchedulingLatencyVisitorARM64::VisitVecSumReduce(HVecSumReduce* instr) {
+ LOG(FATAL) << "Unsupported SIMD instruction " << instr->GetId();
}
void SchedulingLatencyVisitorARM64::VisitVecCnv(HVecCnv* instr ATTRIBUTE_UNUSED) {
@@ -283,8 +283,8 @@ void SchedulingLatencyVisitorARM64::VisitVecAnd(HVecAnd* instr ATTRIBUTE_UNUSED)
last_visited_latency_ = kArm64SIMDIntegerOpLatency;
}
-void SchedulingLatencyVisitorARM64::VisitVecAndNot(HVecAndNot* instr ATTRIBUTE_UNUSED) {
- last_visited_latency_ = kArm64SIMDIntegerOpLatency;
+void SchedulingLatencyVisitorARM64::VisitVecAndNot(HVecAndNot* instr) {
+ LOG(FATAL) << "Unsupported SIMD instruction " << instr->GetId();
}
void SchedulingLatencyVisitorARM64::VisitVecOr(HVecOr* instr ATTRIBUTE_UNUSED) {
@@ -307,10 +307,6 @@ void SchedulingLatencyVisitorARM64::VisitVecUShr(HVecUShr* instr) {
HandleSimpleArithmeticSIMD(instr);
}
-void SchedulingLatencyVisitorARM64::VisitVecSetScalars(HVecSetScalars* instr) {
- HandleSimpleArithmeticSIMD(instr);
-}
-
void SchedulingLatencyVisitorARM64::VisitVecMultiplyAccumulate(
HVecMultiplyAccumulate* instr ATTRIBUTE_UNUSED) {
last_visited_latency_ = kArm64SIMDMulIntegerLatency;
diff --git a/compiler/optimizing/scheduler_arm64.h b/compiler/optimizing/scheduler_arm64.h
index e1a80ec6fb..63d5b7d6b6 100644
--- a/compiler/optimizing/scheduler_arm64.h
+++ b/compiler/optimizing/scheduler_arm64.h
@@ -83,8 +83,8 @@ class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor {
M(SuspendCheck , unused) \
M(TypeConversion , unused) \
M(VecReplicateScalar , unused) \
- M(VecExtractScalar , unused) \
- M(VecReduce , unused) \
+ M(VecSetScalars , unused) \
+ M(VecSumReduce , unused) \
M(VecCnv , unused) \
M(VecNeg , unused) \
M(VecAbs , unused) \
@@ -103,7 +103,6 @@ class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor {
M(VecShl , unused) \
M(VecShr , unused) \
M(VecUShr , unused) \
- M(VecSetScalars , unused) \
M(VecMultiplyAccumulate, unused) \
M(VecLoad , unused) \
M(VecStore , unused)