diff options
author | 2017-02-06 15:35:29 -0800 | |
---|---|---|
committer | 2017-03-31 10:58:11 -0700 | |
commit | f8f5a16ed7bad1e18179e38453e59c96a944de10 (patch) | |
tree | 53369083a97103563467cc5910a439a1864dd0b1 | |
parent | 7298b1ae3e9af5fdb46d168302a26cfbf5d475f5 (diff) |
ART vectorizer.
Rationale:
Make SIMD great again with a retargetable and easily extendable vectorizer.
Provides a full x86/x86_64 and a proof-of-concept ARM implementation. Sample
improvement (without any perf tuning yet) for Linpack on x86 is about 20% to 50%.
Test: test-art-host, test-art-target (angler)
Bug: 34083438, 30933338
Change-Id: Ifb77a0f25f690a87cd65bf3d5e9f6be7ea71d6c1
39 files changed, 6523 insertions, 59 deletions
diff --git a/compiler/Android.bp b/compiler/Android.bp index b444fffd56..312fc7b35a 100644 --- a/compiler/Android.bp +++ b/compiler/Android.bp @@ -106,7 +106,9 @@ art_cc_defaults { "linker/arm/relative_patcher_arm_base.cc", "linker/arm/relative_patcher_thumb2.cc", "optimizing/code_generator_arm.cc", + "optimizing/code_generator_vector_arm.cc", "optimizing/code_generator_arm_vixl.cc", + "optimizing/code_generator_vector_arm_vixl.cc", "optimizing/dex_cache_array_fixups_arm.cc", "optimizing/instruction_simplifier_arm.cc", "optimizing/instruction_simplifier_shared.cc", @@ -126,6 +128,7 @@ art_cc_defaults { "jni/quick/arm64/calling_convention_arm64.cc", "linker/arm64/relative_patcher_arm64.cc", "optimizing/code_generator_arm64.cc", + "optimizing/code_generator_vector_arm64.cc", "optimizing/scheduler_arm64.cc", "optimizing/instruction_simplifier_arm64.cc", "optimizing/intrinsics_arm64.cc", @@ -139,6 +142,7 @@ art_cc_defaults { "jni/quick/mips/calling_convention_mips.cc", "linker/mips/relative_patcher_mips.cc", "optimizing/code_generator_mips.cc", + "optimizing/code_generator_vector_mips.cc", "optimizing/dex_cache_array_fixups_mips.cc", "optimizing/intrinsics_mips.cc", "optimizing/pc_relative_fixups_mips.cc", @@ -151,6 +155,7 @@ art_cc_defaults { "jni/quick/mips64/calling_convention_mips64.cc", "linker/mips64/relative_patcher_mips64.cc", "optimizing/code_generator_mips64.cc", + "optimizing/code_generator_vector_mips64.cc", "optimizing/intrinsics_mips64.cc", "utils/mips64/assembler_mips64.cc", "utils/mips64/managed_register_mips64.cc", @@ -162,6 +167,7 @@ art_cc_defaults { "linker/x86/relative_patcher_x86.cc", "linker/x86/relative_patcher_x86_base.cc", "optimizing/code_generator_x86.cc", + "optimizing/code_generator_vector_x86.cc", "optimizing/intrinsics_x86.cc", "optimizing/pc_relative_fixups_x86.cc", "optimizing/x86_memory_gen.cc", @@ -176,6 +182,7 @@ art_cc_defaults { "linker/x86_64/relative_patcher_x86_64.cc", "optimizing/intrinsics_x86_64.cc", "optimizing/code_generator_x86_64.cc", + "optimizing/code_generator_vector_x86_64.cc", "utils/x86_64/assembler_x86_64.cc", "utils/x86_64/jni_macro_assembler_x86_64.cc", "utils/x86_64/managed_register_x86_64.cc", diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 7471cd5f12..10d8b841f8 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -318,6 +318,11 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator { void GenerateDivRemIntegral(HBinaryOperation* instruction); void HandleGoto(HInstruction* got, HBasicBlock* successor); + vixl::aarch64::MemOperand CreateVecMemRegisters( + HVecMemoryOperation* instruction, + Location* reg_loc, + bool is_load); + Arm64Assembler* const assembler_; CodeGeneratorARM64* const codegen_; diff --git a/compiler/optimizing/code_generator_vector_arm.cc b/compiler/optimizing/code_generator_vector_arm.cc new file mode 100644 index 0000000000..ba2b2cb2c9 --- /dev/null +++ b/compiler/optimizing/code_generator_vector_arm.cc @@ -0,0 +1,235 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "code_generator_arm.h" + +namespace art { +namespace arm { + +// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. +#define __ down_cast<ArmAssembler*>(GetAssembler())-> // NOLINT + +void LocationsBuilderARM::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorARM::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARM::VisitVecSetScalars(HVecSetScalars* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorARM::VisitVecSetScalars(HVecSetScalars* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARM::VisitVecSumReduce(HVecSumReduce* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorARM::VisitVecSumReduce(HVecSumReduce* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +// Helper to set up locations for vector unary operations. +static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + DCHECK(locations); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderARM::VisitVecCnv(HVecCnv* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM::VisitVecCnv(HVecCnv* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARM::VisitVecNeg(HVecNeg* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM::VisitVecNeg(HVecNeg* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARM::VisitVecNot(HVecNot* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM::VisitVecNot(HVecNot* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +// Helper to set up locations for vector binary operations. +static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + DCHECK(locations); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderARM::VisitVecAdd(HVecAdd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM::VisitVecAdd(HVecAdd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARM::VisitVecSub(HVecSub* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM::VisitVecSub(HVecSub* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARM::VisitVecMul(HVecMul* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM::VisitVecMul(HVecMul* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARM::VisitVecDiv(HVecDiv* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM::VisitVecDiv(HVecDiv* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARM::VisitVecAnd(HVecAnd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM::VisitVecAnd(HVecAnd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARM::VisitVecAndNot(HVecAndNot* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM::VisitVecAndNot(HVecAndNot* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARM::VisitVecOr(HVecOr* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM::VisitVecOr(HVecOr* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARM::VisitVecXor(HVecXor* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM::VisitVecXor(HVecXor* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +// Helper to set up locations for vector shift operations. +static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + DCHECK(locations); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderARM::VisitVecShl(HVecShl* instruction) { + CreateVecShiftLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM::VisitVecShl(HVecShl* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARM::VisitVecShr(HVecShr* instruction) { + CreateVecShiftLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM::VisitVecShr(HVecShr* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARM::VisitVecUShr(HVecUShr* instruction) { + CreateVecShiftLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM::VisitVecUShr(HVecUShr* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARM::VisitVecLoad(HVecLoad* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorARM::VisitVecLoad(HVecLoad* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARM::VisitVecStore(HVecStore* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorARM::VisitVecStore(HVecStore* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +#undef __ + +} // namespace arm +} // namespace art diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc new file mode 100644 index 0000000000..96d00210b8 --- /dev/null +++ b/compiler/optimizing/code_generator_vector_arm64.cc @@ -0,0 +1,641 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "code_generator_arm64.h" +#include "mirror/array-inl.h" + +using namespace vixl::aarch64; // NOLINT(build/namespaces) + +namespace art { +namespace arm64 { + +using helpers::DRegisterFrom; +using helpers::HeapOperand; +using helpers::InputRegisterAt; +using helpers::Int64ConstantFrom; +using helpers::XRegisterFrom; + +#define __ GetVIXLAssembler()-> + +void LocationsBuilderARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + break; + case Primitive::kPrimFloat: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { + LocationSummary* locations = instruction->GetLocations(); + FPRegister dst = DRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Dup(dst.V8B(), InputRegisterAt(instruction, 0)); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Dup(dst.V4H(), InputRegisterAt(instruction, 0)); + break; + case Primitive::kPrimInt: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Dup(dst.V2S(), InputRegisterAt(instruction, 0)); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Dup(dst.V2S(), DRegisterFrom(locations->InAt(0)).V2S(), 0); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderARM64::VisitVecSetScalars(HVecSetScalars* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorARM64::VisitVecSetScalars(HVecSetScalars* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARM64::VisitVecSumReduce(HVecSumReduce* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorARM64::VisitVecSumReduce(HVecSumReduce* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +// Helper to set up locations for vector unary operations. +static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), + instruction->IsVecNot() ? Location::kOutputOverlap + : Location::kNoOutputOverlap); + break; + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimFloat: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderARM64::VisitVecCnv(HVecCnv* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM64::VisitVecCnv(HVecCnv* instruction) { + LocationSummary* locations = instruction->GetLocations(); + FPRegister src = DRegisterFrom(locations->InAt(0)); + FPRegister dst = DRegisterFrom(locations->Out()); + Primitive::Type from = instruction->GetInputType(); + Primitive::Type to = instruction->GetResultType(); + if (from == Primitive::kPrimInt && to == Primitive::kPrimFloat) { + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Scvtf(dst.V2S(), src.V2S()); + } else { + LOG(FATAL) << "Unsupported SIMD type"; + } +} + +void LocationsBuilderARM64::VisitVecNeg(HVecNeg* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM64::VisitVecNeg(HVecNeg* instruction) { + LocationSummary* locations = instruction->GetLocations(); + FPRegister src = DRegisterFrom(locations->InAt(0)); + FPRegister dst = DRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Neg(dst.V8B(), src.V8B()); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Neg(dst.V4H(), src.V4H()); + break; + case Primitive::kPrimInt: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Neg(dst.V2S(), src.V2S()); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Fneg(dst.V2S(), src.V2S()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderARM64::VisitVecNot(HVecNot* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM64::VisitVecNot(HVecNot* instruction) { + LocationSummary* locations = instruction->GetLocations(); + FPRegister src = DRegisterFrom(locations->InAt(0)); + FPRegister dst = DRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: // special case boolean-not + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Movi(dst.V8B(), 1); + __ Eor(dst.V8B(), dst.V8B(), src.V8B()); + break; + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + __ Not(dst.V8B(), src.V8B()); // lanes do not matter + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +// Helper to set up locations for vector binary operations. +static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimFloat: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderARM64::VisitVecAdd(HVecAdd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM64::VisitVecAdd(HVecAdd* instruction) { + LocationSummary* locations = instruction->GetLocations(); + FPRegister lhs = DRegisterFrom(locations->InAt(0)); + FPRegister rhs = DRegisterFrom(locations->InAt(1)); + FPRegister dst = DRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Add(dst.V8B(), lhs.V8B(), rhs.V8B()); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Add(dst.V4H(), lhs.V4H(), rhs.V4H()); + break; + case Primitive::kPrimInt: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Add(dst.V2S(), lhs.V2S(), rhs.V2S()); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Fadd(dst.V2S(), lhs.V2S(), rhs.V2S()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderARM64::VisitVecSub(HVecSub* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM64::VisitVecSub(HVecSub* instruction) { + LocationSummary* locations = instruction->GetLocations(); + FPRegister lhs = DRegisterFrom(locations->InAt(0)); + FPRegister rhs = DRegisterFrom(locations->InAt(1)); + FPRegister dst = DRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Sub(dst.V8B(), lhs.V8B(), rhs.V8B()); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Sub(dst.V4H(), lhs.V4H(), rhs.V4H()); + break; + case Primitive::kPrimInt: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Sub(dst.V2S(), lhs.V2S(), rhs.V2S()); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Fsub(dst.V2S(), lhs.V2S(), rhs.V2S()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderARM64::VisitVecMul(HVecMul* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM64::VisitVecMul(HVecMul* instruction) { + LocationSummary* locations = instruction->GetLocations(); + FPRegister lhs = DRegisterFrom(locations->InAt(0)); + FPRegister rhs = DRegisterFrom(locations->InAt(1)); + FPRegister dst = DRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Mul(dst.V8B(), lhs.V8B(), rhs.V8B()); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Mul(dst.V4H(), lhs.V4H(), rhs.V4H()); + break; + case Primitive::kPrimInt: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Mul(dst.V2S(), lhs.V2S(), rhs.V2S()); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Fmul(dst.V2S(), lhs.V2S(), rhs.V2S()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderARM64::VisitVecDiv(HVecDiv* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM64::VisitVecDiv(HVecDiv* instruction) { + LocationSummary* locations = instruction->GetLocations(); + FPRegister lhs = DRegisterFrom(locations->InAt(0)); + FPRegister rhs = DRegisterFrom(locations->InAt(1)); + FPRegister dst = DRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimFloat: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Fdiv(dst.V2S(), lhs.V2S(), rhs.V2S()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderARM64::VisitVecAnd(HVecAnd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM64::VisitVecAnd(HVecAnd* instruction) { + LocationSummary* locations = instruction->GetLocations(); + FPRegister lhs = DRegisterFrom(locations->InAt(0)); + FPRegister rhs = DRegisterFrom(locations->InAt(1)); + FPRegister dst = DRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimFloat: + __ And(dst.V8B(), lhs.V8B(), rhs.V8B()); // lanes do not matter + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderARM64::VisitVecAndNot(HVecAndNot* instruction) { + LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId(); +} + +void InstructionCodeGeneratorARM64::VisitVecAndNot(HVecAndNot* instruction) { + LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId(); +} + +void LocationsBuilderARM64::VisitVecOr(HVecOr* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM64::VisitVecOr(HVecOr* instruction) { + LocationSummary* locations = instruction->GetLocations(); + FPRegister lhs = DRegisterFrom(locations->InAt(0)); + FPRegister rhs = DRegisterFrom(locations->InAt(1)); + FPRegister dst = DRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimFloat: + __ Orr(dst.V8B(), lhs.V8B(), rhs.V8B()); // lanes do not matter + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderARM64::VisitVecXor(HVecXor* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM64::VisitVecXor(HVecXor* instruction) { + LocationSummary* locations = instruction->GetLocations(); + FPRegister lhs = DRegisterFrom(locations->InAt(0)); + FPRegister rhs = DRegisterFrom(locations->InAt(1)); + FPRegister dst = DRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimFloat: + __ Eor(dst.V8B(), lhs.V8B(), rhs.V8B()); // lanes do not matter + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +// Helper to set up locations for vector shift operations. +static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderARM64::VisitVecShl(HVecShl* instruction) { + CreateVecShiftLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM64::VisitVecShl(HVecShl* instruction) { + LocationSummary* locations = instruction->GetLocations(); + FPRegister lhs = DRegisterFrom(locations->InAt(0)); + FPRegister dst = DRegisterFrom(locations->Out()); + int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Shl(dst.V8B(), lhs.V8B(), value); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Shl(dst.V4H(), lhs.V4H(), value); + break; + case Primitive::kPrimInt: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Shl(dst.V2S(), lhs.V2S(), value); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderARM64::VisitVecShr(HVecShr* instruction) { + CreateVecShiftLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM64::VisitVecShr(HVecShr* instruction) { + LocationSummary* locations = instruction->GetLocations(); + FPRegister lhs = DRegisterFrom(locations->InAt(0)); + FPRegister dst = DRegisterFrom(locations->Out()); + int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Sshr(dst.V8B(), lhs.V8B(), value); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Sshr(dst.V4H(), lhs.V4H(), value); + break; + case Primitive::kPrimInt: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Sshr(dst.V2S(), lhs.V2S(), value); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderARM64::VisitVecUShr(HVecUShr* instruction) { + CreateVecShiftLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM64::VisitVecUShr(HVecUShr* instruction) { + LocationSummary* locations = instruction->GetLocations(); + FPRegister lhs = DRegisterFrom(locations->InAt(0)); + FPRegister dst = DRegisterFrom(locations->Out()); + int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Ushr(dst.V8B(), lhs.V8B(), value); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Ushr(dst.V4H(), lhs.V4H(), value); + break; + case Primitive::kPrimInt: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Ushr(dst.V2S(), lhs.V2S(), value); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +// Helper to set up locations for vector memory operations. +static void CreateVecMemLocations(ArenaAllocator* arena, + HVecMemoryOperation* instruction, + bool is_load) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimFloat: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + if (is_load) { + locations->SetOut(Location::RequiresFpuRegister()); + } else { + locations->SetInAt(2, Location::RequiresFpuRegister()); + } + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +// Helper to set up registers and address for vector memory operations. +MemOperand InstructionCodeGeneratorARM64::CreateVecMemRegisters( + HVecMemoryOperation* instruction, + Location* reg_loc, + bool is_load) { + LocationSummary* locations = instruction->GetLocations(); + Register base = InputRegisterAt(instruction, 0); + Location index = locations->InAt(1); + *reg_loc = is_load ? locations->Out() : locations->InAt(2); + + Primitive::Type packed_type = instruction->GetPackedType(); + uint32_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(packed_type)).Uint32Value(); + size_t shift = Primitive::ComponentSizeShift(packed_type); + + UseScratchRegisterScope temps(GetVIXLAssembler()); + Register temp = temps.AcquireSameSizeAs(base); + if (index.IsConstant()) { + offset += Int64ConstantFrom(index) << shift; + __ Add(temp, base, offset); + } else { + if (instruction->InputAt(0)->IsIntermediateAddress()) { + temp = base; + } else { + __ Add(temp, base, offset); + } + __ Add(temp.X(), temp.X(), Operand(XRegisterFrom(index), LSL, shift)); + } + return HeapOperand(temp); +} + +void LocationsBuilderARM64::VisitVecLoad(HVecLoad* instruction) { + CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ true); +} + +void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) { + Location reg_loc = Location::NoLocation(); + MemOperand mem = CreateVecMemRegisters(instruction, ®_loc, /*is_load*/ true); + FPRegister reg = DRegisterFrom(reg_loc); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Ld1(reg.V8B(), mem); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Ld1(reg.V4H(), mem); + break; + case Primitive::kPrimInt: + case Primitive::kPrimFloat: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Ld1(reg.V2S(), mem); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderARM64::VisitVecStore(HVecStore* instruction) { + CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ false); +} + +void InstructionCodeGeneratorARM64::VisitVecStore(HVecStore* instruction) { + Location reg_loc = Location::NoLocation(); + MemOperand mem = CreateVecMemRegisters(instruction, ®_loc, /*is_load*/ false); + FPRegister reg = DRegisterFrom(reg_loc); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ St1(reg.V8B(), mem); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ St1(reg.V4H(), mem); + break; + case Primitive::kPrimInt: + case Primitive::kPrimFloat: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ St1(reg.V2S(), mem); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +#undef __ + +} // namespace arm64 +} // namespace art diff --git a/compiler/optimizing/code_generator_vector_arm_vixl.cc b/compiler/optimizing/code_generator_vector_arm_vixl.cc new file mode 100644 index 0000000000..171198902d --- /dev/null +++ b/compiler/optimizing/code_generator_vector_arm_vixl.cc @@ -0,0 +1,235 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "code_generator_arm_vixl.h" + +namespace art { +namespace arm { + +// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. +#define __ reinterpret_cast<ArmVIXLAssembler*>(GetAssembler())->GetVIXLAssembler()-> // NOLINT + +void LocationsBuilderARMVIXL::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARMVIXL::VisitVecSumReduce(HVecSumReduce* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecSumReduce(HVecSumReduce* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +// Helper to set up locations for vector unary operations. +static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + DCHECK(locations); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderARMVIXL::VisitVecCnv(HVecCnv* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecCnv(HVecCnv* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARMVIXL::VisitVecNeg(HVecNeg* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecNeg(HVecNeg* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARMVIXL::VisitVecNot(HVecNot* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecNot(HVecNot* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +// Helper to set up locations for vector binary operations. +static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + DCHECK(locations); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderARMVIXL::VisitVecAdd(HVecAdd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecAdd(HVecAdd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARMVIXL::VisitVecSub(HVecSub* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecSub(HVecSub* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARMVIXL::VisitVecMul(HVecMul* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecMul(HVecMul* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARMVIXL::VisitVecDiv(HVecDiv* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecDiv(HVecDiv* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARMVIXL::VisitVecAnd(HVecAnd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecAnd(HVecAnd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARMVIXL::VisitVecAndNot(HVecAndNot* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecAndNot(HVecAndNot* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARMVIXL::VisitVecOr(HVecOr* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecOr(HVecOr* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARMVIXL::VisitVecXor(HVecXor* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecXor(HVecXor* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +// Helper to set up locations for vector shift operations. +static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + DCHECK(locations); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderARMVIXL::VisitVecShl(HVecShl* instruction) { + CreateVecShiftLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecShl(HVecShl* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARMVIXL::VisitVecShr(HVecShr* instruction) { + CreateVecShiftLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecShr(HVecShr* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARMVIXL::VisitVecUShr(HVecUShr* instruction) { + CreateVecShiftLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecUShr(HVecUShr* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARMVIXL::VisitVecLoad(HVecLoad* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecLoad(HVecLoad* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARMVIXL::VisitVecStore(HVecStore* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecStore(HVecStore* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +#undef __ + +} // namespace arm +} // namespace art diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc new file mode 100644 index 0000000000..6f5fe0d2a4 --- /dev/null +++ b/compiler/optimizing/code_generator_vector_mips.cc @@ -0,0 +1,235 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "code_generator_mips.h" + +namespace art { +namespace mips { + +// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. +#define __ down_cast<MipsAssembler*>(GetAssembler())-> // NOLINT + +void LocationsBuilderMIPS::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorMIPS::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS::VisitVecSetScalars(HVecSetScalars* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorMIPS::VisitVecSetScalars(HVecSetScalars* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS::VisitVecSumReduce(HVecSumReduce* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorMIPS::VisitVecSumReduce(HVecSumReduce* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +// Helper to set up locations for vector unary operations. +static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + DCHECK(locations); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderMIPS::VisitVecCnv(HVecCnv* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS::VisitVecCnv(HVecCnv* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS::VisitVecNeg(HVecNeg* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS::VisitVecNeg(HVecNeg* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS::VisitVecNot(HVecNot* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS::VisitVecNot(HVecNot* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +// Helper to set up locations for vector binary operations. +static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + DCHECK(locations); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderMIPS::VisitVecAdd(HVecAdd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS::VisitVecAdd(HVecAdd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS::VisitVecSub(HVecSub* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS::VisitVecSub(HVecSub* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS::VisitVecMul(HVecMul* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS::VisitVecMul(HVecMul* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS::VisitVecDiv(HVecDiv* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS::VisitVecDiv(HVecDiv* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS::VisitVecAnd(HVecAnd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS::VisitVecAnd(HVecAnd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS::VisitVecAndNot(HVecAndNot* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS::VisitVecAndNot(HVecAndNot* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS::VisitVecOr(HVecOr* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS::VisitVecOr(HVecOr* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS::VisitVecXor(HVecXor* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS::VisitVecXor(HVecXor* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +// Helper to set up locations for vector shift operations. +static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + DCHECK(locations); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderMIPS::VisitVecShl(HVecShl* instruction) { + CreateVecShiftLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS::VisitVecShl(HVecShl* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS::VisitVecShr(HVecShr* instruction) { + CreateVecShiftLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS::VisitVecShr(HVecShr* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS::VisitVecUShr(HVecUShr* instruction) { + CreateVecShiftLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS::VisitVecUShr(HVecUShr* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS::VisitVecLoad(HVecLoad* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorMIPS::VisitVecLoad(HVecLoad* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS::VisitVecStore(HVecStore* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorMIPS::VisitVecStore(HVecStore* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +#undef __ + +} // namespace mips +} // namespace art diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc new file mode 100644 index 0000000000..2ee7ac91cf --- /dev/null +++ b/compiler/optimizing/code_generator_vector_mips64.cc @@ -0,0 +1,235 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "code_generator_mips64.h" + +namespace art { +namespace mips64 { + +// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. +#define __ down_cast<Mips64Assembler*>(GetAssembler())-> // NOLINT + +void LocationsBuilderMIPS64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorMIPS64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS64::VisitVecSumReduce(HVecSumReduce* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorMIPS64::VisitVecSumReduce(HVecSumReduce* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +// Helper to set up locations for vector unary operations. +static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + DCHECK(locations); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderMIPS64::VisitVecCnv(HVecCnv* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS64::VisitVecCnv(HVecCnv* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS64::VisitVecNeg(HVecNeg* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS64::VisitVecNeg(HVecNeg* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS64::VisitVecNot(HVecNot* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS64::VisitVecNot(HVecNot* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +// Helper to set up locations for vector binary operations. +static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + DCHECK(locations); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderMIPS64::VisitVecAdd(HVecAdd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS64::VisitVecAdd(HVecAdd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS64::VisitVecSub(HVecSub* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS64::VisitVecSub(HVecSub* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS64::VisitVecMul(HVecMul* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS64::VisitVecMul(HVecMul* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS64::VisitVecDiv(HVecDiv* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS64::VisitVecDiv(HVecDiv* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS64::VisitVecAnd(HVecAnd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS64::VisitVecAnd(HVecAnd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS64::VisitVecAndNot(HVecAndNot* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS64::VisitVecAndNot(HVecAndNot* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS64::VisitVecOr(HVecOr* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS64::VisitVecOr(HVecOr* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS64::VisitVecXor(HVecXor* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS64::VisitVecXor(HVecXor* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +// Helper to set up locations for vector shift operations. +static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + DCHECK(locations); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderMIPS64::VisitVecShl(HVecShl* instruction) { + CreateVecShiftLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS64::VisitVecShl(HVecShl* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS64::VisitVecShr(HVecShr* instruction) { + CreateVecShiftLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS64::VisitVecShr(HVecShr* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS64::VisitVecUShr(HVecUShr* instruction) { + CreateVecShiftLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS64::VisitVecUShr(HVecUShr* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS64::VisitVecLoad(HVecLoad* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorMIPS64::VisitVecLoad(HVecLoad* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS64::VisitVecStore(HVecStore* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorMIPS64::VisitVecStore(HVecStore* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +#undef __ + +} // namespace mips64 +} // namespace art diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc new file mode 100644 index 0000000000..4f3988ee2e --- /dev/null +++ b/compiler/optimizing/code_generator_vector_x86.cc @@ -0,0 +1,767 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "code_generator_x86.h" +#include "mirror/array-inl.h" + +namespace art { +namespace x86 { + +// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. +#define __ down_cast<X86Assembler*>(GetAssembler())-> // NOLINT + +void LocationsBuilderX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimLong: + // Long needs extra temporary to load the register pair. + locations->AddTemp(Location::RequiresFpuRegister()); + FALLTHROUGH_INTENDED; + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void InstructionCodeGeneratorX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { + LocationSummary* locations = instruction->GetLocations(); + XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ movd(reg, locations->InAt(0).AsRegister<Register>()); + __ punpcklbw(reg, reg); + __ punpcklwd(reg, reg); + __ pshufd(reg, reg, Immediate(0)); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ movd(reg, locations->InAt(0).AsRegister<Register>()); + __ punpcklwd(reg, reg); + __ pshufd(reg, reg, Immediate(0)); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ movd(reg, locations->InAt(0).AsRegister<Register>()); + __ pshufd(reg, reg, Immediate(0)); + break; + case Primitive::kPrimLong: { + XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ movd(reg, locations->InAt(0).AsRegisterPairLow<Register>()); + __ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>()); + __ punpckldq(reg, tmp); + __ punpcklqdq(reg, reg); + break; + } + case Primitive::kPrimFloat: + DCHECK(locations->InAt(0).Equals(locations->Out())); + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ shufps(reg, reg, Immediate(0)); + break; + case Primitive::kPrimDouble: + DCHECK(locations->InAt(0).Equals(locations->Out())); + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ shufpd(reg, reg, Immediate(0)); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86::VisitVecSetScalars(HVecSetScalars* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderX86::VisitVecSumReduce(HVecSumReduce* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorX86::VisitVecSumReduce(HVecSumReduce* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +// Helper to set up locations for vector unary operations. +static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86::VisitVecCnv(HVecCnv* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86::VisitVecCnv(HVecCnv* instruction) { + LocationSummary* locations = instruction->GetLocations(); + XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + Primitive::Type from = instruction->GetInputType(); + Primitive::Type to = instruction->GetResultType(); + if (from == Primitive::kPrimInt && to == Primitive::kPrimFloat) { + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ cvtdq2ps(dst, src); + } else { + LOG(FATAL) << "Unsupported SIMD type"; + } +} + +void LocationsBuilderX86::VisitVecNeg(HVecNeg* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86::VisitVecNeg(HVecNeg* instruction) { + LocationSummary* locations = instruction->GetLocations(); + XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ pxor(dst, dst); + __ psubb(dst, src); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ pxor(dst, dst); + __ psubw(dst, src); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ pxor(dst, dst); + __ psubd(dst, src); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ pxor(dst, dst); + __ psubq(dst, src); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ xorps(dst, dst); + __ subps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ xorpd(dst, dst); + __ subpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86::VisitVecNot(HVecNot* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); + // Boolean-not requires a temporary to construct the 16 x one. + if (instruction->GetPackedType() == Primitive::kPrimBoolean) { + instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister()); + } +} + +void InstructionCodeGeneratorX86::VisitVecNot(HVecNot* instruction) { + LocationSummary* locations = instruction->GetLocations(); + XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: { // special case boolean-not + DCHECK_EQ(16u, instruction->GetVectorLength()); + XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + __ pxor(dst, dst); + __ pcmpeqb(tmp, tmp); // all ones + __ psubb(dst, tmp); // 16 x one + __ pxor(dst, src); + break; + } + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 16u); + __ pcmpeqb(dst, dst); // all ones + __ pxor(dst, src); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ pcmpeqb(dst, dst); // all ones + __ xorps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ pcmpeqb(dst, dst); // all ones + __ xorpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +// Helper to set up locations for vector binary operations. +static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86::VisitVecAdd(HVecAdd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86::VisitVecAdd(HVecAdd* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ paddb(dst, src); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ paddw(dst, src); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ paddd(dst, src); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ paddq(dst, src); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ addps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ addpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86::VisitVecSub(HVecSub* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86::VisitVecSub(HVecSub* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ psubb(dst, src); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ psubw(dst, src); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ psubd(dst, src); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ psubq(dst, src); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ subps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ subpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86::VisitVecMul(HVecMul* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86::VisitVecMul(HVecMul* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ pmullw(dst, src); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ pmulld(dst, src); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ mulps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ mulpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86::VisitVecDiv(HVecDiv* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86::VisitVecDiv(HVecDiv* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ divps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ divpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86::VisitVecAnd(HVecAnd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86::VisitVecAnd(HVecAnd* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 16u); + __ pand(dst, src); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ andps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ andpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86::VisitVecAndNot(HVecAndNot* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86::VisitVecAndNot(HVecAndNot* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 16u); + __ pandn(dst, src); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ andnps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ andnpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86::VisitVecOr(HVecOr* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86::VisitVecOr(HVecOr* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 16u); + __ por(dst, src); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ orps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ orpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86::VisitVecXor(HVecXor* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86::VisitVecXor(HVecXor* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 16u); + __ pxor(dst, src); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ xorps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ xorpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +// Helper to set up locations for vector shift operations. +static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86::VisitVecShl(HVecShl* instruction) { + CreateVecShiftLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86::VisitVecShl(HVecShl* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ psllw(dst, Immediate(static_cast<uint8_t>(value))); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ pslld(dst, Immediate(static_cast<uint8_t>(value))); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ psllq(dst, Immediate(static_cast<uint8_t>(value))); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86::VisitVecShr(HVecShr* instruction) { + CreateVecShiftLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86::VisitVecShr(HVecShr* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ psraw(dst, Immediate(static_cast<uint8_t>(value))); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ psrad(dst, Immediate(static_cast<uint8_t>(value))); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86::VisitVecUShr(HVecUShr* instruction) { + CreateVecShiftLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86::VisitVecUShr(HVecUShr* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ psrlw(dst, Immediate(static_cast<uint8_t>(value))); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ psrld(dst, Immediate(static_cast<uint8_t>(value))); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ psrlq(dst, Immediate(static_cast<uint8_t>(value))); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +// Helper to set up locations for vector memory operations. +static void CreateVecMemLocations(ArenaAllocator* arena, + HVecMemoryOperation* instruction, + bool is_load) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + if (is_load) { + locations->SetOut(Location::RequiresFpuRegister()); + } else { + locations->SetInAt(2, Location::RequiresFpuRegister()); + } + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +// Helper to set up registers and address for vector memory operations. +static Address CreateVecMemRegisters(HVecMemoryOperation* instruction, + Location* reg_loc, + bool is_load) { + LocationSummary* locations = instruction->GetLocations(); + Location base = locations->InAt(0); + Location index = locations->InAt(1); + *reg_loc = is_load ? locations->Out() : locations->InAt(2); + size_t size = Primitive::ComponentSize(instruction->GetPackedType()); + uint32_t offset = mirror::Array::DataOffset(size).Uint32Value(); + ScaleFactor scale = TIMES_1; + switch (size) { + case 2: scale = TIMES_2; break; + case 4: scale = TIMES_4; break; + case 8: scale = TIMES_8; break; + default: break; + } + return CodeGeneratorX86::ArrayAddress(base.AsRegister<Register>(), index, scale, offset); +} + +void LocationsBuilderX86::VisitVecLoad(HVecLoad* instruction) { + CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ true); +} + +void InstructionCodeGeneratorX86::VisitVecLoad(HVecLoad* instruction) { + Location reg_loc = Location::NoLocation(); + Address address = CreateVecMemRegisters(instruction, ®_loc, /*is_load*/ true); + XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>(); + bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 16u); + is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + is_aligned16 ? __ movaps(reg, address) : __ movups(reg, address); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + is_aligned16 ? __ movapd(reg, address) : __ movupd(reg, address); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86::VisitVecStore(HVecStore* instruction) { + CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ false); +} + +void InstructionCodeGeneratorX86::VisitVecStore(HVecStore* instruction) { + Location reg_loc = Location::NoLocation(); + Address address = CreateVecMemRegisters(instruction, ®_loc, /*is_load*/ false); + XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>(); + bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 16u); + is_aligned16 ? __ movdqa(address, reg) : __ movdqu(address, reg); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + is_aligned16 ? __ movaps(address, reg) : __ movups(address, reg); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + is_aligned16 ? __ movapd(address, reg) : __ movupd(address, reg); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +#undef __ + +} // namespace x86 +} // namespace art diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc new file mode 100644 index 0000000000..b1c1494f6b --- /dev/null +++ b/compiler/optimizing/code_generator_vector_x86_64.cc @@ -0,0 +1,760 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "code_generator_x86_64.h" +#include "mirror/array-inl.h" + +namespace art { +namespace x86_64 { + +// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. +#define __ down_cast<X86_64Assembler*>(GetAssembler())-> // NOLINT + +void LocationsBuilderX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void InstructionCodeGeneratorX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { + LocationSummary* locations = instruction->GetLocations(); + XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>()); + __ punpcklbw(reg, reg); + __ punpcklwd(reg, reg); + __ pshufd(reg, reg, Immediate(0)); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>()); + __ punpcklwd(reg, reg); + __ pshufd(reg, reg, Immediate(0)); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>()); + __ pshufd(reg, reg, Immediate(0)); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>()); // is 64-bit + __ punpcklqdq(reg, reg); + break; + case Primitive::kPrimFloat: + DCHECK(locations->InAt(0).Equals(locations->Out())); + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ shufps(reg, reg, Immediate(0)); + break; + case Primitive::kPrimDouble: + DCHECK(locations->InAt(0).Equals(locations->Out())); + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ shufpd(reg, reg, Immediate(0)); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86_64::VisitVecSetScalars(HVecSetScalars* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorX86_64::VisitVecSetScalars(HVecSetScalars* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderX86_64::VisitVecSumReduce(HVecSumReduce* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorX86_64::VisitVecSumReduce(HVecSumReduce* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +// Helper to set up locations for vector unary operations. +static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86_64::VisitVecCnv(HVecCnv* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86_64::VisitVecCnv(HVecCnv* instruction) { + LocationSummary* locations = instruction->GetLocations(); + XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + Primitive::Type from = instruction->GetInputType(); + Primitive::Type to = instruction->GetResultType(); + if (from == Primitive::kPrimInt && to == Primitive::kPrimFloat) { + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ cvtdq2ps(dst, src); + } else { + LOG(FATAL) << "Unsupported SIMD type"; + } +} + +void LocationsBuilderX86_64::VisitVecNeg(HVecNeg* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86_64::VisitVecNeg(HVecNeg* instruction) { + LocationSummary* locations = instruction->GetLocations(); + XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ pxor(dst, dst); + __ psubb(dst, src); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ pxor(dst, dst); + __ psubw(dst, src); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ pxor(dst, dst); + __ psubd(dst, src); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ pxor(dst, dst); + __ psubq(dst, src); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ xorps(dst, dst); + __ subps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ xorpd(dst, dst); + __ subpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86_64::VisitVecNot(HVecNot* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); + // Boolean-not requires a temporary to construct the 16 x one. + if (instruction->GetPackedType() == Primitive::kPrimBoolean) { + instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister()); + } +} + +void InstructionCodeGeneratorX86_64::VisitVecNot(HVecNot* instruction) { + LocationSummary* locations = instruction->GetLocations(); + XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: { // special case boolean-not + DCHECK_EQ(16u, instruction->GetVectorLength()); + XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + __ pxor(dst, dst); + __ pcmpeqb(tmp, tmp); // all ones + __ psubb(dst, tmp); // 16 x one + __ pxor(dst, src); + break; + } + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 16u); + __ pcmpeqb(dst, dst); // all ones + __ pxor(dst, src); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ pcmpeqb(dst, dst); // all ones + __ xorps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ pcmpeqb(dst, dst); // all ones + __ xorpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +// Helper to set up locations for vector binary operations. +static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86_64::VisitVecAdd(HVecAdd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86_64::VisitVecAdd(HVecAdd* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ paddb(dst, src); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ paddw(dst, src); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ paddd(dst, src); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ paddq(dst, src); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ addps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ addpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86_64::VisitVecSub(HVecSub* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86_64::VisitVecSub(HVecSub* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ psubb(dst, src); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ psubw(dst, src); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ psubd(dst, src); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ psubq(dst, src); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ subps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ subpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86_64::VisitVecMul(HVecMul* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86_64::VisitVecMul(HVecMul* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ pmullw(dst, src); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ pmulld(dst, src); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ mulps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ mulpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86_64::VisitVecDiv(HVecDiv* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86_64::VisitVecDiv(HVecDiv* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ divps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ divpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86_64::VisitVecAnd(HVecAnd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86_64::VisitVecAnd(HVecAnd* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 16u); + __ pand(dst, src); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ andps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ andpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86_64::VisitVecAndNot(HVecAndNot* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86_64::VisitVecAndNot(HVecAndNot* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 16u); + __ pandn(dst, src); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ andnps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ andnpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86_64::VisitVecOr(HVecOr* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86_64::VisitVecOr(HVecOr* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 16u); + __ por(dst, src); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ orps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ orpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86_64::VisitVecXor(HVecXor* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86_64::VisitVecXor(HVecXor* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 16u); + __ pxor(dst, src); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ xorps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ xorpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +// Helper to set up locations for vector shift operations. +static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86_64::VisitVecShl(HVecShl* instruction) { + CreateVecShiftLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86_64::VisitVecShl(HVecShl* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ psllw(dst, Immediate(static_cast<int8_t>(value))); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ pslld(dst, Immediate(static_cast<int8_t>(value))); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ psllq(dst, Immediate(static_cast<int8_t>(value))); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86_64::VisitVecShr(HVecShr* instruction) { + CreateVecShiftLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86_64::VisitVecShr(HVecShr* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ psraw(dst, Immediate(static_cast<int8_t>(value))); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ psrad(dst, Immediate(static_cast<int8_t>(value))); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86_64::VisitVecUShr(HVecUShr* instruction) { + CreateVecShiftLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86_64::VisitVecUShr(HVecUShr* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ psrlw(dst, Immediate(static_cast<int8_t>(value))); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ psrld(dst, Immediate(static_cast<int8_t>(value))); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ psrlq(dst, Immediate(static_cast<int8_t>(value))); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +// Helper to set up locations for vector memory operations. +static void CreateVecMemLocations(ArenaAllocator* arena, + HVecMemoryOperation* instruction, + bool is_load) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + if (is_load) { + locations->SetOut(Location::RequiresFpuRegister()); + } else { + locations->SetInAt(2, Location::RequiresFpuRegister()); + } + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +// Helper to set up registers and address for vector memory operations. +static Address CreateVecMemRegisters(HVecMemoryOperation* instruction, + Location* reg_loc, + bool is_load) { + LocationSummary* locations = instruction->GetLocations(); + Location base = locations->InAt(0); + Location index = locations->InAt(1); + *reg_loc = is_load ? locations->Out() : locations->InAt(2); + size_t size = Primitive::ComponentSize(instruction->GetPackedType()); + uint32_t offset = mirror::Array::DataOffset(size).Uint32Value(); + ScaleFactor scale = TIMES_1; + switch (size) { + case 2: scale = TIMES_2; break; + case 4: scale = TIMES_4; break; + case 8: scale = TIMES_8; break; + default: break; + } + return CodeGeneratorX86_64::ArrayAddress(base.AsRegister<CpuRegister>(), index, scale, offset); +} + +void LocationsBuilderX86_64::VisitVecLoad(HVecLoad* instruction) { + CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ true); +} + +void InstructionCodeGeneratorX86_64::VisitVecLoad(HVecLoad* instruction) { + Location reg_loc = Location::NoLocation(); + Address address = CreateVecMemRegisters(instruction, ®_loc, /*is_load*/ true); + XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>(); + bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 16u); + is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + is_aligned16 ? __ movaps(reg, address) : __ movups(reg, address); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + is_aligned16 ? __ movapd(reg, address) : __ movupd(reg, address); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86_64::VisitVecStore(HVecStore* instruction) { + CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ false); +} + +void InstructionCodeGeneratorX86_64::VisitVecStore(HVecStore* instruction) { + Location reg_loc = Location::NoLocation(); + Address address = CreateVecMemRegisters(instruction, ®_loc, /*is_load*/ false); + XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>(); + bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 16u); + is_aligned16 ? __ movdqa(address, reg) : __ movdqu(address, reg); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + is_aligned16 ? __ movaps(address, reg) : __ movups(address, reg); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + is_aligned16 ? __ movapd(address, reg) : __ movupd(address, reg); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +#undef __ + +} // namespace x86_64 +} // namespace art diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 8df513f410..42ed04dfa3 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -16,11 +16,21 @@ #include "loop_optimization.h" +#include "arch/instruction_set.h" +#include "arch/arm/instruction_set_features_arm.h" +#include "arch/arm64/instruction_set_features_arm64.h" +#include "arch/mips/instruction_set_features_mips.h" +#include "arch/mips64/instruction_set_features_mips64.h" +#include "arch/x86/instruction_set_features_x86.h" +#include "arch/x86_64/instruction_set_features_x86_64.h" #include "driver/compiler_driver.h" #include "linear_order.h" namespace art { +// Enables vectorization (SIMDization) in the loop optimizer. +static constexpr bool kEnableVectorization = true; + // Remove the instruction from the graph. A bit more elaborate than the usual // instruction removal, since there may be a cycle in the use structure. static void RemoveFromCycle(HInstruction* instruction) { @@ -53,6 +63,19 @@ static bool IsEarlyExit(HLoopInformation* loop_info) { return false; } +// Test vector restrictions. +static bool HasVectorRestrictions(uint64_t restrictions, uint64_t tested) { + return (restrictions & tested) != 0; +} + +// Inserts an instruction. +static HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) { + DCHECK(block != nullptr); + DCHECK(instruction != nullptr); + block->InsertInstructionBefore(instruction, block->GetLastInstruction()); + return instruction; +} + // // Class methods. // @@ -64,11 +87,15 @@ HLoopOptimization::HLoopOptimization(HGraph* graph, compiler_driver_(compiler_driver), induction_range_(induction_analysis), loop_allocator_(nullptr), + global_allocator_(graph_->GetArena()), top_loop_(nullptr), last_loop_(nullptr), iset_(nullptr), induction_simplication_count_(0), - simplified_(false) { + simplified_(false), + vector_length_(0), + vector_refs_(nullptr), + vector_map_(nullptr) { } void HLoopOptimization::Run() { @@ -81,15 +108,13 @@ void HLoopOptimization::Run() { // Phase-local allocator that draws from the global pool. Since the allocator // itself resides on the stack, it is destructed on exiting Run(), which // implies its underlying memory is released immediately. - ArenaAllocator allocator(graph_->GetArena()->GetArenaPool()); + ArenaAllocator allocator(global_allocator_->GetArenaPool()); loop_allocator_ = &allocator; // Perform loop optimizations. LocalRun(); - if (top_loop_ == nullptr) { - // All loops have been eliminated. - graph_->SetHasLoops(false); + graph_->SetHasLoops(false); // no more loops } // Detach. @@ -111,18 +136,29 @@ void HLoopOptimization::LocalRun() { } // Traverse the loop hierarchy inner-to-outer and optimize. Traversal can use - // a temporary set that stores instructions using the phase-local allocator. + // temporary data structures using the phase-local allocator. All new HIR + // should use the global allocator. if (top_loop_ != nullptr) { ArenaSet<HInstruction*> iset(loop_allocator_->Adapter(kArenaAllocLoopOptimization)); + ArenaSet<ArrayReference> refs(loop_allocator_->Adapter(kArenaAllocLoopOptimization)); + ArenaSafeMap<HInstruction*, HInstruction*> map( + std::less<HInstruction*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization)); + // Attach. iset_ = &iset; + vector_refs_ = &refs; + vector_map_ = ↦ + // Traverse. TraverseLoopsInnerToOuter(top_loop_); - iset_ = nullptr; // detach + // Detach. + iset_ = nullptr; + vector_refs_ = nullptr; + vector_map_ = nullptr; } } void HLoopOptimization::AddLoop(HLoopInformation* loop_info) { DCHECK(loop_info != nullptr); - LoopNode* node = new (loop_allocator_) LoopNode(loop_info); // phase-local allocator + LoopNode* node = new (loop_allocator_) LoopNode(loop_info); if (last_loop_ == nullptr) { // First loop. DCHECK(top_loop_ == nullptr); @@ -170,7 +206,7 @@ void HLoopOptimization::RemoveLoop(LoopNode* node) { void HLoopOptimization::TraverseLoopsInnerToOuter(LoopNode* node) { for ( ; node != nullptr; node = node->next) { // Visit inner loops first. - int current_induction_simplification_count = induction_simplication_count_; + uint32_t current_induction_simplification_count = induction_simplication_count_; if (node->inner != nullptr) { TraverseLoopsInnerToOuter(node->inner); } @@ -179,7 +215,7 @@ void HLoopOptimization::TraverseLoopsInnerToOuter(LoopNode* node) { if (current_induction_simplification_count != induction_simplication_count_) { induction_range_.ReVisit(node->loop_info); } - // Repeat simplifications in the body of this loop until no more changes occur. + // Repeat simplifications in the loop-body until no more changes occur. // Note that since each simplification consists of eliminating code (without // introducing new code), this process is always finite. do { @@ -187,13 +223,17 @@ void HLoopOptimization::TraverseLoopsInnerToOuter(LoopNode* node) { SimplifyInduction(node); SimplifyBlocks(node); } while (simplified_); - // Simplify inner loop. + // Optimize inner loop. if (node->inner == nullptr) { - SimplifyInnerLoop(node); + OptimizeInnerLoop(node); } } } +// +// Optimization. +// + void HLoopOptimization::SimplifyInduction(LoopNode* node) { HBasicBlock* header = node->loop_info->GetHeader(); HBasicBlock* preheader = node->loop_info->GetPreHeader(); @@ -204,13 +244,9 @@ void HLoopOptimization::SimplifyInduction(LoopNode* node) { // for (int i = 0; i < 10; i++, k++) { .... no k .... } return k; for (HInstructionIterator it(header->GetPhis()); !it.Done(); it.Advance()) { HPhi* phi = it.Current()->AsPhi(); - iset_->clear(); - int32_t use_count = 0; - if (IsPhiInduction(phi) && - IsOnlyUsedAfterLoop(node->loop_info, phi, /*collect_loop_uses*/ false, &use_count) && - // No uses, or no early-exit with proper replacement. - (use_count == 0 || - (!IsEarlyExit(node->loop_info) && TryReplaceWithLastValue(phi, preheader)))) { + iset_->clear(); // prepare phi induction + if (TrySetPhiInduction(phi, /*restrict_uses*/ true) && + TryAssignLastValue(node->loop_info, phi, preheader, /*collect_loop_uses*/ false)) { for (HInstruction* i : *iset_) { RemoveFromCycle(i); } @@ -256,49 +292,47 @@ void HLoopOptimization::SimplifyBlocks(LoopNode* node) { } } -bool HLoopOptimization::SimplifyInnerLoop(LoopNode* node) { +void HLoopOptimization::OptimizeInnerLoop(LoopNode* node) { HBasicBlock* header = node->loop_info->GetHeader(); HBasicBlock* preheader = node->loop_info->GetPreHeader(); // Ensure loop header logic is finite. - int64_t tc = 0; - if (!induction_range_.IsFinite(node->loop_info, &tc)) { - return false; + int64_t trip_count = 0; + if (!induction_range_.IsFinite(node->loop_info, &trip_count)) { + return; } + // Ensure there is only a single loop-body (besides the header). HBasicBlock* body = nullptr; for (HBlocksInLoopIterator it(*node->loop_info); !it.Done(); it.Advance()) { if (it.Current() != header) { if (body != nullptr) { - return false; + return; } body = it.Current(); } } // Ensure there is only a single exit point. if (header->GetSuccessors().size() != 2) { - return false; + return; } HBasicBlock* exit = (header->GetSuccessors()[0] == body) ? header->GetSuccessors()[1] : header->GetSuccessors()[0]; // Ensure exit can only be reached by exiting loop. if (exit->GetPredecessors().size() != 1) { - return false; + return; } // Detect either an empty loop (no side effects other than plain iteration) or // a trivial loop (just iterating once). Replace subsequent index uses, if any, // with the last value and remove the loop, possibly after unrolling its body. HInstruction* phi = header->GetFirstPhi(); - iset_->clear(); - int32_t use_count = 0; - if (IsEmptyHeader(header)) { + iset_->clear(); // prepare phi induction + if (TrySetSimpleLoopHeader(header)) { bool is_empty = IsEmptyBody(body); - if ((is_empty || tc == 1) && - IsOnlyUsedAfterLoop(node->loop_info, phi, /*collect_loop_uses*/ true, &use_count) && - // No uses, or proper replacement. - (use_count == 0 || TryReplaceWithLastValue(phi, preheader))) { + if ((is_empty || trip_count == 1) && + TryAssignLastValue(node->loop_info, phi, preheader, /*collect_loop_uses*/ true)) { if (!is_empty) { - // Unroll the loop body, which sees initial value of the index. + // Unroll the loop-body, which sees initial value of the index. phi->ReplaceWith(phi->InputAt(0)); preheader->MergeInstructionsWith(body); } @@ -308,28 +342,649 @@ bool HLoopOptimization::SimplifyInnerLoop(LoopNode* node) { header->RemoveDominatedBlock(exit); header->DisconnectAndDelete(); preheader->AddSuccessor(exit); - preheader->AddInstruction(new (graph_->GetArena()) HGoto()); // global allocator + preheader->AddInstruction(new (global_allocator_) HGoto()); preheader->AddDominatedBlock(exit); exit->SetDominator(preheader); RemoveLoop(node); // update hierarchy + return; + } + } + + // Vectorize loop, if possible and valid. + if (kEnableVectorization) { + iset_->clear(); // prepare phi induction + if (TrySetSimpleLoopHeader(header) && + CanVectorize(node, body, trip_count) && + TryAssignLastValue(node->loop_info, phi, preheader, /*collect_loop_uses*/ true)) { + Vectorize(node, body, exit, trip_count); + graph_->SetHasSIMD(true); // flag SIMD usage + return; + } + } +} + +// +// Loop vectorization. The implementation is based on the book by Aart J.C. Bik: +// "The Software Vectorization Handbook. Applying Multimedia Extensions for Maximum Performance." +// Intel Press, June, 2004 (http://www.aartbik.com/). +// + +bool HLoopOptimization::CanVectorize(LoopNode* node, HBasicBlock* block, int64_t trip_count) { + // Reset vector bookkeeping. + vector_length_ = 0; + vector_refs_->clear(); + vector_runtime_test_a_ = + vector_runtime_test_b_= nullptr; + + // Phis in the loop-body prevent vectorization. + if (!block->GetPhis().IsEmpty()) { + return false; + } + + // Scan the loop-body, starting a right-hand-side tree traversal at each left-hand-side + // occurrence, which allows passing down attributes down the use tree. + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + if (!VectorizeDef(node, it.Current(), /*generate_code*/ false)) { + return false; // failure to vectorize a left-hand-side + } + } + + // Heuristics. Does vectorization seem profitable? + // TODO: refine + if (vector_length_ == 0) { + return false; // nothing found + } else if (0 < trip_count && trip_count < vector_length_) { + return false; // insufficient iterations + } + + // Data dependence analysis. Find each pair of references with same type, where + // at least one is a write. Each such pair denotes a possible data dependence. + // This analysis exploits the property that differently typed arrays cannot be + // aliased, as well as the property that references either point to the same + // array or to two completely disjoint arrays, i.e., no partial aliasing. + // Other than a few simply heuristics, no detailed subscript analysis is done. + for (auto i = vector_refs_->begin(); i != vector_refs_->end(); ++i) { + for (auto j = i; ++j != vector_refs_->end(); ) { + if (i->type == j->type && (i->lhs || j->lhs)) { + // Found same-typed a[i+x] vs. b[i+y], where at least one is a write. + HInstruction* a = i->base; + HInstruction* b = j->base; + HInstruction* x = i->offset; + HInstruction* y = j->offset; + if (a == b) { + // Found a[i+x] vs. a[i+y]. Accept if x == y (loop-independent data dependence). + // Conservatively assume a loop-carried data dependence otherwise, and reject. + if (x != y) { + return false; + } + } else { + // Found a[i+x] vs. b[i+y]. Accept if x == y (at worst loop-independent data dependence). + // Conservatively assume a potential loop-carried data dependence otherwise, avoided by + // generating an explicit a != b disambiguation runtime test on the two references. + if (x != y) { + // For now, we reject after one test to avoid excessive overhead. + if (vector_runtime_test_a_ != nullptr) { + return false; + } + vector_runtime_test_a_ = a; + vector_runtime_test_b_ = b; + } + } + } + } + } + + // Success! + return true; +} + +void HLoopOptimization::Vectorize(LoopNode* node, + HBasicBlock* block, + HBasicBlock* exit, + int64_t trip_count) { + Primitive::Type induc_type = Primitive::kPrimInt; + HBasicBlock* header = node->loop_info->GetHeader(); + HBasicBlock* preheader = node->loop_info->GetPreHeader(); + + // A cleanup is needed for any unknown trip count or for a known trip count + // with remainder iterations after vectorization. + bool needs_cleanup = trip_count == 0 || (trip_count % vector_length_) != 0; + + // Adjust vector bookkeeping. + iset_->clear(); // prepare phi induction + bool is_simple_loop_header = TrySetSimpleLoopHeader(header); // fills iset_ + DCHECK(is_simple_loop_header); + + // Generate preheader: + // stc = <trip-count>; + // vtc = stc - stc % VL; + HInstruction* stc = induction_range_.GenerateTripCount(node->loop_info, graph_, preheader); + HInstruction* vtc = stc; + if (needs_cleanup) { + DCHECK(IsPowerOfTwo(vector_length_)); + HInstruction* rem = Insert( + preheader, new (global_allocator_) HAnd(induc_type, + stc, + graph_->GetIntConstant(vector_length_ - 1))); + vtc = Insert(preheader, new (global_allocator_) HSub(induc_type, stc, rem)); + } + + // Generate runtime disambiguation test: + // vtc = a != b ? vtc : 0; + if (vector_runtime_test_a_ != nullptr) { + HInstruction* rt = Insert( + preheader, + new (global_allocator_) HNotEqual(vector_runtime_test_a_, vector_runtime_test_b_)); + vtc = Insert(preheader, + new (global_allocator_) HSelect(rt, vtc, graph_->GetIntConstant(0), kNoDexPc)); + needs_cleanup = true; + } + + // Generate vector loop: + // for (i = 0; i < vtc; i += VL) + // <vectorized-loop-body> + vector_mode_ = kVector; + GenerateNewLoop(node, + block, + graph_->TransformLoopForVectorization(header, block, exit), + graph_->GetIntConstant(0), + vtc, + graph_->GetIntConstant(vector_length_)); + HLoopInformation* vloop = vector_header_->GetLoopInformation(); + + // Generate cleanup loop, if needed: + // for ( ; i < stc; i += 1) + // <loop-body> + if (needs_cleanup) { + vector_mode_ = kSequential; + GenerateNewLoop(node, + block, + graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit), + vector_phi_, + stc, + graph_->GetIntConstant(1)); + } + + // Remove the original loop by disconnecting the body block + // and removing all instructions from the header. + block->DisconnectAndDelete(); + while (!header->GetFirstInstruction()->IsGoto()) { + header->RemoveInstruction(header->GetFirstInstruction()); + } + // Update loop hierarchy: the old header now resides in the + // same outer loop as the old preheader. + header->SetLoopInformation(preheader->GetLoopInformation()); // outward + node->loop_info = vloop; +} + +void HLoopOptimization::GenerateNewLoop(LoopNode* node, + HBasicBlock* block, + HBasicBlock* new_preheader, + HInstruction* lo, + HInstruction* hi, + HInstruction* step) { + Primitive::Type induc_type = Primitive::kPrimInt; + // Prepare new loop. + vector_map_->clear(); + vector_preheader_ = new_preheader, + vector_header_ = vector_preheader_->GetSingleSuccessor(); + vector_body_ = vector_header_->GetSuccessors()[1]; + vector_phi_ = new (global_allocator_) HPhi(global_allocator_, + kNoRegNumber, + 0, + HPhi::ToPhiType(induc_type)); + // Generate header. + // for (i = lo; i < hi; i += step) + // <loop-body> + HInstruction* cond = new (global_allocator_) HAboveOrEqual(vector_phi_, hi); + vector_header_->AddPhi(vector_phi_); + vector_header_->AddInstruction(cond); + vector_header_->AddInstruction(new (global_allocator_) HIf(cond)); + // Suspend check and environment. + HInstruction* suspend = vector_header_->GetFirstInstruction(); + suspend->CopyEnvironmentFromWithLoopPhiAdjustment( + node->loop_info->GetSuspendCheck()->GetEnvironment(), vector_header_); + // Generate body. + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + bool vectorized_def = VectorizeDef(node, it.Current(), /*generate_code*/ true); + DCHECK(vectorized_def); + } + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + auto i = vector_map_->find(it.Current()); + if (i != vector_map_->end() && !i->second->IsInBlock()) { + Insert(vector_body_, i->second); // lays out in original order + if (i->second->NeedsEnvironment()) { + i->second->CopyEnvironmentFromWithLoopPhiAdjustment( + suspend->GetEnvironment(), vector_header_); + } + } + } + // Finalize increment and phi. + HInstruction* inc = new (global_allocator_) HAdd(induc_type, vector_phi_, step); + vector_phi_->AddInput(lo); + vector_phi_->AddInput(Insert(vector_body_, inc)); +} + +// TODO: accept reductions at left-hand-side, mixed-type store idioms, etc. +bool HLoopOptimization::VectorizeDef(LoopNode* node, + HInstruction* instruction, + bool generate_code) { + // Accept a left-hand-side array base[index] for + // (1) supported vector type, + // (2) loop-invariant base, + // (3) unit stride index, + // (4) vectorizable right-hand-side value. + uint64_t restrictions = kNone; + if (instruction->IsArraySet()) { + Primitive::Type type = instruction->AsArraySet()->GetComponentType(); + HInstruction* base = instruction->InputAt(0); + HInstruction* index = instruction->InputAt(1); + HInstruction* value = instruction->InputAt(2); + HInstruction* offset = nullptr; + if (TrySetVectorType(type, &restrictions) && + node->loop_info->IsDefinedOutOfTheLoop(base) && + induction_range_.IsUnitStride(index, &offset) && + VectorizeUse(node, value, generate_code, type, restrictions)) { + if (generate_code) { + GenerateVecSub(index, offset); + GenerateVecMem(instruction, vector_map_->Get(index), vector_map_->Get(value), type); + } else { + vector_refs_->insert(ArrayReference(base, offset, type, /*lhs*/ true)); + } return true; } + return false; + } + // Branch back okay. + if (instruction->IsGoto()) { + return true; + } + // Otherwise accept only expressions with no effects outside the immediate loop-body. + // Note that actual uses are inspected during right-hand-side tree traversal. + return !IsUsedOutsideLoop(node->loop_info, instruction) && !instruction->DoesAnyWrite(); +} + +// TODO: more operations and intrinsics, detect saturation arithmetic, etc. +bool HLoopOptimization::VectorizeUse(LoopNode* node, + HInstruction* instruction, + bool generate_code, + Primitive::Type type, + uint64_t restrictions) { + // Accept anything for which code has already been generated. + if (generate_code) { + if (vector_map_->find(instruction) != vector_map_->end()) { + return true; + } + } + // Continue the right-hand-side tree traversal, passing in proper + // types and vector restrictions along the way. During code generation, + // all new nodes are drawn from the global allocator. + if (node->loop_info->IsDefinedOutOfTheLoop(instruction)) { + // Accept invariant use, using scalar expansion. + if (generate_code) { + GenerateVecInv(instruction, type); + } + return true; + } else if (instruction->IsArrayGet()) { + // Accept a right-hand-side array base[index] for + // (1) exact matching vector type, + // (2) loop-invariant base, + // (3) unit stride index, + // (4) vectorizable right-hand-side value. + HInstruction* base = instruction->InputAt(0); + HInstruction* index = instruction->InputAt(1); + HInstruction* offset = nullptr; + if (type == instruction->GetType() && + node->loop_info->IsDefinedOutOfTheLoop(base) && + induction_range_.IsUnitStride(index, &offset)) { + if (generate_code) { + GenerateVecSub(index, offset); + GenerateVecMem(instruction, vector_map_->Get(index), nullptr, type); + } else { + vector_refs_->insert(ArrayReference(base, offset, type, /*lhs*/ false)); + } + return true; + } + } else if (instruction->IsTypeConversion()) { + // Accept particular type conversions. + HTypeConversion* conversion = instruction->AsTypeConversion(); + HInstruction* opa = conversion->InputAt(0); + Primitive::Type from = conversion->GetInputType(); + Primitive::Type to = conversion->GetResultType(); + if ((to == Primitive::kPrimByte || + to == Primitive::kPrimChar || + to == Primitive::kPrimShort) && from == Primitive::kPrimInt) { + // Accept a "narrowing" type conversion from a "wider" computation for + // (1) conversion into final required type, + // (2) vectorizable operand, + // (3) "wider" operations cannot bring in higher order bits. + if (to == type && VectorizeUse(node, opa, generate_code, type, restrictions | kNoHiBits)) { + if (generate_code) { + if (vector_mode_ == kVector) { + vector_map_->Put(instruction, vector_map_->Get(opa)); // operand pass-through + } else { + GenerateVecOp(instruction, vector_map_->Get(opa), nullptr, type); + } + } + return true; + } + } else if (to == Primitive::kPrimFloat && from == Primitive::kPrimInt) { + DCHECK_EQ(to, type); + // Accept int to float conversion for + // (1) supported int, + // (2) vectorizable operand. + if (TrySetVectorType(from, &restrictions) && + VectorizeUse(node, opa, generate_code, from, restrictions)) { + if (generate_code) { + GenerateVecOp(instruction, vector_map_->Get(opa), nullptr, type); + } + return true; + } + } + return false; + } else if (instruction->IsNeg() || instruction->IsNot() || instruction->IsBooleanNot()) { + // Accept unary operator for vectorizable operand. + HInstruction* opa = instruction->InputAt(0); + if (VectorizeUse(node, opa, generate_code, type, restrictions)) { + if (generate_code) { + GenerateVecOp(instruction, vector_map_->Get(opa), nullptr, type); + } + return true; + } + } else if (instruction->IsAdd() || instruction->IsSub() || + instruction->IsMul() || instruction->IsDiv() || + instruction->IsAnd() || instruction->IsOr() || instruction->IsXor()) { + // Deal with vector restrictions. + if ((instruction->IsMul() && HasVectorRestrictions(restrictions, kNoMul)) || + (instruction->IsDiv() && HasVectorRestrictions(restrictions, kNoDiv))) { + return false; + } + // Accept binary operator for vectorizable operands. + HInstruction* opa = instruction->InputAt(0); + HInstruction* opb = instruction->InputAt(1); + if (VectorizeUse(node, opa, generate_code, type, restrictions) && + VectorizeUse(node, opb, generate_code, type, restrictions)) { + if (generate_code) { + GenerateVecOp(instruction, vector_map_->Get(opa), vector_map_->Get(opb), type); + } + return true; + } + } else if (instruction->IsShl() || instruction->IsShr() || instruction->IsUShr()) { + // Deal with vector restrictions. + if ((HasVectorRestrictions(restrictions, kNoShift)) || + (instruction->IsShr() && HasVectorRestrictions(restrictions, kNoShr))) { + return false; // unsupported instruction + } else if ((instruction->IsShr() || instruction->IsUShr()) && + HasVectorRestrictions(restrictions, kNoHiBits)) { + return false; // hibits may impact lobits; TODO: we can do better! + } + // Accept shift operator for vectorizable/invariant operands. + // TODO: accept symbolic, albeit loop invariant shift factors. + HInstruction* opa = instruction->InputAt(0); + HInstruction* opb = instruction->InputAt(1); + if (VectorizeUse(node, opa, generate_code, type, restrictions) && opb->IsIntConstant()) { + if (generate_code) { + // Make sure shift factor only looks at lower bits, as defined for sequential shifts. + // Note that even the narrower SIMD shifts do the right thing after that. + int32_t mask = (instruction->GetType() == Primitive::kPrimLong) + ? kMaxLongShiftDistance + : kMaxIntShiftDistance; + HInstruction* s = graph_->GetIntConstant(opb->AsIntConstant()->GetValue() & mask); + GenerateVecOp(instruction, vector_map_->Get(opa), s, type); + } + return true; + } + } else if (instruction->IsInvokeStaticOrDirect()) { + // TODO: coming soon. + return false; } return false; } -bool HLoopOptimization::IsPhiInduction(HPhi* phi) { +bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restrictions) { + const InstructionSetFeatures* features = compiler_driver_->GetInstructionSetFeatures(); + switch (compiler_driver_->GetInstructionSet()) { + case kArm: + case kThumb2: + return false; + case kArm64: + // Allow vectorization for all ARM devices, because Android assumes that + // ARMv8 AArch64 always supports advanced SIMD. For now, only D registers + // (64-bit vectors) not Q registers (128-bit vectors). + switch (type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + *restrictions |= kNoDiv; + return TrySetVectorLength(8); + case Primitive::kPrimChar: + case Primitive::kPrimShort: + *restrictions |= kNoDiv; + return TrySetVectorLength(4); + case Primitive::kPrimInt: + *restrictions |= kNoDiv; + return TrySetVectorLength(2); + case Primitive::kPrimFloat: + return TrySetVectorLength(2); + default: + return false; + } + case kX86: + case kX86_64: + // Allow vectorization for SSE4-enabled X86 devices only (128-bit vectors). + if (features->AsX86InstructionSetFeatures()->HasSSE4_1()) { + switch (type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + *restrictions |= kNoMul | kNoDiv | kNoShift; + return TrySetVectorLength(16); + case Primitive::kPrimChar: + case Primitive::kPrimShort: + *restrictions |= kNoDiv; + return TrySetVectorLength(8); + case Primitive::kPrimInt: + *restrictions |= kNoDiv; + return TrySetVectorLength(4); + case Primitive::kPrimLong: + *restrictions |= kNoMul | kNoDiv | kNoShr; + return TrySetVectorLength(2); + case Primitive::kPrimFloat: + return TrySetVectorLength(4); + case Primitive::kPrimDouble: + return TrySetVectorLength(2); + default: + break; + } // switch type + } + return false; + case kMips: + case kMips64: + // TODO: implement MIPS SIMD. + return false; + default: + return false; + } // switch instruction set +} + +bool HLoopOptimization::TrySetVectorLength(uint32_t length) { + DCHECK(IsPowerOfTwo(length) && length >= 2u); + // First time set? + if (vector_length_ == 0) { + vector_length_ = length; + } + // Different types are acceptable within a loop-body, as long as all the corresponding vector + // lengths match exactly to obtain a uniform traversal through the vector iteration space + // (idiomatic exceptions to this rule can be handled by further unrolling sub-expressions). + return vector_length_ == length; +} + +void HLoopOptimization::GenerateVecInv(HInstruction* org, Primitive::Type type) { + if (vector_map_->find(org) == vector_map_->end()) { + // In scalar code, just use a self pass-through for scalar invariants + // (viz. expression remains itself). + if (vector_mode_ == kSequential) { + vector_map_->Put(org, org); + return; + } + // In vector code, explicit scalar expansion is needed. + HInstruction* vector = new (global_allocator_) HVecReplicateScalar( + global_allocator_, org, type, vector_length_); + vector_map_->Put(org, Insert(vector_preheader_, vector)); + } +} + +void HLoopOptimization::GenerateVecSub(HInstruction* org, HInstruction* offset) { + if (vector_map_->find(org) == vector_map_->end()) { + HInstruction* subscript = vector_phi_; + if (offset != nullptr) { + subscript = new (global_allocator_) HAdd(Primitive::kPrimInt, subscript, offset); + if (org->IsPhi()) { + Insert(vector_body_, subscript); // lacks layout placeholder + } + } + vector_map_->Put(org, subscript); + } +} + +void HLoopOptimization::GenerateVecMem(HInstruction* org, + HInstruction* opa, + HInstruction* opb, + Primitive::Type type) { + HInstruction* vector = nullptr; + if (vector_mode_ == kVector) { + // Vector store or load. + if (opb != nullptr) { + vector = new (global_allocator_) HVecStore( + global_allocator_, org->InputAt(0), opa, opb, type, vector_length_); + } else { + vector = new (global_allocator_) HVecLoad( + global_allocator_, org->InputAt(0), opa, type, vector_length_); + } + } else { + // Scalar store or load. + DCHECK(vector_mode_ == kSequential); + if (opb != nullptr) { + vector = new (global_allocator_) HArraySet(org->InputAt(0), opa, opb, type, kNoDexPc); + } else { + vector = new (global_allocator_) HArrayGet(org->InputAt(0), opa, type, kNoDexPc); + } + } + vector_map_->Put(org, vector); +} + +#define GENERATE_VEC(x, y) \ + if (vector_mode_ == kVector) { \ + vector = (x); \ + } else { \ + DCHECK(vector_mode_ == kSequential); \ + vector = (y); \ + } \ + break; + +void HLoopOptimization::GenerateVecOp(HInstruction* org, + HInstruction* opa, + HInstruction* opb, + Primitive::Type type) { + if (vector_mode_ == kSequential) { + // Scalar code follows implicit integral promotion. + if (type == Primitive::kPrimBoolean || + type == Primitive::kPrimByte || + type == Primitive::kPrimChar || + type == Primitive::kPrimShort) { + type = Primitive::kPrimInt; + } + } + HInstruction* vector = nullptr; + switch (org->GetKind()) { + case HInstruction::kNeg: + DCHECK(opb == nullptr); + GENERATE_VEC( + new (global_allocator_) HVecNeg(global_allocator_, opa, type, vector_length_), + new (global_allocator_) HNeg(type, opa)); + case HInstruction::kNot: + DCHECK(opb == nullptr); + GENERATE_VEC( + new (global_allocator_) HVecNot(global_allocator_, opa, type, vector_length_), + new (global_allocator_) HNot(type, opa)); + case HInstruction::kBooleanNot: + DCHECK(opb == nullptr); + GENERATE_VEC( + new (global_allocator_) HVecNot(global_allocator_, opa, type, vector_length_), + new (global_allocator_) HBooleanNot(opa)); + case HInstruction::kTypeConversion: + DCHECK(opb == nullptr); + GENERATE_VEC( + new (global_allocator_) HVecCnv(global_allocator_, opa, type, vector_length_), + new (global_allocator_) HTypeConversion(type, opa, kNoDexPc)); + case HInstruction::kAdd: + GENERATE_VEC( + new (global_allocator_) HVecAdd(global_allocator_, opa, opb, type, vector_length_), + new (global_allocator_) HAdd(type, opa, opb)); + case HInstruction::kSub: + GENERATE_VEC( + new (global_allocator_) HVecSub(global_allocator_, opa, opb, type, vector_length_), + new (global_allocator_) HSub(type, opa, opb)); + case HInstruction::kMul: + GENERATE_VEC( + new (global_allocator_) HVecMul(global_allocator_, opa, opb, type, vector_length_), + new (global_allocator_) HMul(type, opa, opb)); + case HInstruction::kDiv: + GENERATE_VEC( + new (global_allocator_) HVecDiv(global_allocator_, opa, opb, type, vector_length_), + new (global_allocator_) HDiv(type, opa, opb, kNoDexPc)); + case HInstruction::kAnd: + GENERATE_VEC( + new (global_allocator_) HVecAnd(global_allocator_, opa, opb, type, vector_length_), + new (global_allocator_) HAnd(type, opa, opb)); + case HInstruction::kOr: + GENERATE_VEC( + new (global_allocator_) HVecOr(global_allocator_, opa, opb, type, vector_length_), + new (global_allocator_) HOr(type, opa, opb)); + case HInstruction::kXor: + GENERATE_VEC( + new (global_allocator_) HVecXor(global_allocator_, opa, opb, type, vector_length_), + new (global_allocator_) HXor(type, opa, opb)); + case HInstruction::kShl: + GENERATE_VEC( + new (global_allocator_) HVecShl(global_allocator_, opa, opb, type, vector_length_), + new (global_allocator_) HShl(type, opa, opb)); + case HInstruction::kShr: + GENERATE_VEC( + new (global_allocator_) HVecShr(global_allocator_, opa, opb, type, vector_length_), + new (global_allocator_) HShr(type, opa, opb)); + case HInstruction::kUShr: + GENERATE_VEC( + new (global_allocator_) HVecUShr(global_allocator_, opa, opb, type, vector_length_), + new (global_allocator_) HUShr(type, opa, opb)); + case HInstruction::kInvokeStaticOrDirect: { + // TODO: coming soon. + break; + } + default: + break; + } // switch + CHECK(vector != nullptr) << "Unsupported SIMD operator"; + vector_map_->Put(org, vector); +} + +#undef GENERATE_VEC + +// +// Helpers. +// + +bool HLoopOptimization::TrySetPhiInduction(HPhi* phi, bool restrict_uses) { + DCHECK(iset_->empty()); ArenaSet<HInstruction*>* set = induction_range_.LookupCycle(phi); if (set != nullptr) { - DCHECK(iset_->empty()); for (HInstruction* i : *set) { // Check that, other than instructions that are no longer in the graph (removed earlier) - // each instruction is removable and, other than the phi, uses are contained in the cycle. + // each instruction is removable and, when restrict uses are requested, other than for phi, + // all uses are contained within the cycle. if (!i->IsInBlock()) { continue; } else if (!i->IsRemovable()) { return false; - } else if (i != phi) { + } else if (i != phi && restrict_uses) { for (const HUseListNode<HInstruction*>& use : i->GetUses()) { if (set->find(use.GetUser()) == set->end()) { return false; @@ -348,10 +1003,12 @@ bool HLoopOptimization::IsPhiInduction(HPhi* phi) { // c: Condition(phi, bound) // i: If(c) // TODO: Find a less pattern matching approach? -bool HLoopOptimization::IsEmptyHeader(HBasicBlock* block) { +bool HLoopOptimization::TrySetSimpleLoopHeader(HBasicBlock* block) { DCHECK(iset_->empty()); HInstruction* phi = block->GetFirstPhi(); - if (phi != nullptr && phi->GetNext() == nullptr && IsPhiInduction(phi->AsPhi())) { + if (phi != nullptr && + phi->GetNext() == nullptr && + TrySetPhiInduction(phi->AsPhi(), /*restrict_uses*/ false)) { HInstruction* s = block->GetFirstInstruction(); if (s != nullptr && s->IsSuspendCheck()) { HInstruction* c = s->GetNext(); @@ -369,14 +1026,24 @@ bool HLoopOptimization::IsEmptyHeader(HBasicBlock* block) { } bool HLoopOptimization::IsEmptyBody(HBasicBlock* block) { - if (block->GetFirstPhi() == nullptr) { - for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { - HInstruction* instruction = it.Current(); - if (!instruction->IsGoto() && iset_->find(instruction) == iset_->end()) { - return false; - } + if (!block->GetPhis().IsEmpty()) { + return false; + } + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* instruction = it.Current(); + if (!instruction->IsGoto() && iset_->find(instruction) == iset_->end()) { + return false; + } + } + return true; +} + +bool HLoopOptimization::IsUsedOutsideLoop(HLoopInformation* loop_info, + HInstruction* instruction) { + for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) { + if (use.GetUser()->GetBlock()->GetLoopInformation() != loop_info) { + return true; } - return true; } return false; } @@ -438,6 +1105,19 @@ bool HLoopOptimization::TryReplaceWithLastValue(HInstruction* instruction, HBasi return false; } +bool HLoopOptimization::TryAssignLastValue(HLoopInformation* loop_info, + HInstruction* instruction, + HBasicBlock* block, + bool collect_loop_uses) { + // Assigning the last value is always successful if there are no uses. + // Otherwise, it succeeds in a no early-exit loop by generating the + // proper last value assignment. + int32_t use_count = 0; + return IsOnlyUsedAfterLoop(loop_info, instruction, collect_loop_uses, &use_count) && + (use_count == 0 || + (!IsEarlyExit(loop_info) && TryReplaceWithLastValue(instruction, block))); +} + void HLoopOptimization::RemoveDeadInstructions(const HInstructionList& list) { for (HBackwardInstructionIterator i(list); !i.Done(); i.Advance()) { HInstruction* instruction = i.Current(); diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h index 0b798fc7a9..16f7691af2 100644 --- a/compiler/optimizing/loop_optimization.h +++ b/compiler/optimizing/loop_optimization.h @@ -27,7 +27,8 @@ class CompilerDriver; /** * Loop optimizations. Builds a loop hierarchy and applies optimizations to - * the detected nested loops, such as removal of dead induction and empty loops. + * the detected nested loops, such as removal of dead induction and empty loops + * and inner loop vectorization. */ class HLoopOptimization : public HOptimization { public: @@ -50,34 +51,105 @@ class HLoopOptimization : public HOptimization { inner(nullptr), previous(nullptr), next(nullptr) {} - HLoopInformation* const loop_info; + HLoopInformation* loop_info; LoopNode* outer; LoopNode* inner; LoopNode* previous; LoopNode* next; }; - void LocalRun(); + /* + * Vectorization restrictions (bit mask). + */ + enum VectorRestrictions { + kNone = 0, // no restrictions + kNoMul = 1, // no multiplication + kNoDiv = 2, // no division + kNoShift = 4, // no shift + kNoShr = 8, // no arithmetic shift right + kNoHiBits = 16, // "wider" operations cannot bring in higher order bits + }; + + /* + * Vectorization mode during synthesis + * (sequential peeling/cleanup loop or vector loop). + */ + enum VectorMode { + kSequential, + kVector + }; + + /* + * Representation of a unit-stride array reference. + */ + struct ArrayReference { + ArrayReference(HInstruction* b, HInstruction* o, Primitive::Type t, bool l) + : base(b), offset(o), type(t), lhs(l) { } + bool operator<(const ArrayReference& other) const { + return + (base < other.base) || + (base == other.base && + (offset < other.offset || (offset == other.offset && + (type < other.type || + (type == other.type && lhs < other.lhs))))); + } + HInstruction* base; // base address + HInstruction* offset; // offset + i + Primitive::Type type; // component type + bool lhs; // def/use + }; + // Loop setup and traversal. + void LocalRun(); void AddLoop(HLoopInformation* loop_info); void RemoveLoop(LoopNode* node); - void TraverseLoopsInnerToOuter(LoopNode* node); - // Simplification. + // Optimization. void SimplifyInduction(LoopNode* node); void SimplifyBlocks(LoopNode* node); - bool SimplifyInnerLoop(LoopNode* node); + void OptimizeInnerLoop(LoopNode* node); + + // Vectorization analysis and synthesis. + bool CanVectorize(LoopNode* node, HBasicBlock* block, int64_t trip_count); + void Vectorize(LoopNode* node, HBasicBlock* block, HBasicBlock* exit, int64_t trip_count); + void GenerateNewLoop(LoopNode* node, + HBasicBlock* block, + HBasicBlock* new_preheader, + HInstruction* lo, + HInstruction* hi, + HInstruction* step); + bool VectorizeDef(LoopNode* node, HInstruction* instruction, bool generate_code); + bool VectorizeUse(LoopNode* node, + HInstruction* instruction, + bool generate_code, + Primitive::Type type, + uint64_t restrictions); + bool TrySetVectorType(Primitive::Type type, /*out*/ uint64_t* restrictions); + bool TrySetVectorLength(uint32_t length); + void GenerateVecInv(HInstruction* org, Primitive::Type type); + void GenerateVecSub(HInstruction* org, HInstruction* off); + void GenerateVecMem(HInstruction* org, + HInstruction* opa, + HInstruction* opb, + Primitive::Type type); + void GenerateVecOp(HInstruction* org, HInstruction* opa, HInstruction* opb, Primitive::Type type); // Helpers. - bool IsPhiInduction(HPhi* phi); - bool IsEmptyHeader(HBasicBlock* block); + bool TrySetPhiInduction(HPhi* phi, bool restrict_uses); + bool TrySetSimpleLoopHeader(HBasicBlock* block); bool IsEmptyBody(HBasicBlock* block); bool IsOnlyUsedAfterLoop(HLoopInformation* loop_info, HInstruction* instruction, bool collect_loop_uses, /*out*/ int32_t* use_count); + bool IsUsedOutsideLoop(HLoopInformation* loop_info, + HInstruction* instruction); bool TryReplaceWithLastValue(HInstruction* instruction, HBasicBlock* block); + bool TryAssignLastValue(HLoopInformation* loop_info, + HInstruction* instruction, + HBasicBlock* block, + bool collect_loop_uses); void RemoveDeadInstructions(const HInstructionList& list); // Compiler driver (to query ISA features). @@ -90,6 +162,9 @@ class HLoopOptimization : public HOptimization { // through this allocator is immediately released when the loop optimizer is done. ArenaAllocator* loop_allocator_; + // Global heap memory allocator. Used to build HIR. + ArenaAllocator* global_allocator_; + // Entries into the loop hierarchy representation. The hierarchy resides // in phase-local heap memory. LoopNode* top_loop_; @@ -102,11 +177,33 @@ class HLoopOptimization : public HOptimization { // Counter that tracks how many induction cycles have been simplified. Useful // to trigger incremental updates of induction variable analysis of outer loops // when the induction of inner loops has changed. - int32_t induction_simplication_count_; + uint32_t induction_simplication_count_; // Flag that tracks if any simplifications have occurred. bool simplified_; + // Number of "lanes" for selected packed type. + uint32_t vector_length_; + + // Set of array references in the vector loop. + // Contents reside in phase-local heap memory. + ArenaSet<ArrayReference>* vector_refs_; + + // Mapping used during vectorization synthesis for both the scalar peeling/cleanup + // loop (simd_ is false) and the actual vector loop (simd_ is true). The data + // structure maps original instructions into the new instructions. + // Contents reside in phase-local heap memory. + ArenaSafeMap<HInstruction*, HInstruction*>* vector_map_; + + // Temporary vectorization bookkeeping. + HBasicBlock* vector_preheader_; // preheader of the new loop + HBasicBlock* vector_header_; // header of the new loop + HBasicBlock* vector_body_; // body of the new loop + HInstruction* vector_runtime_test_a_; + HInstruction* vector_runtime_test_b_; // defines a != b runtime test + HPhi* vector_phi_; // the Phi representing the normalized loop index + VectorMode vector_mode_; // selects synthesis mode + friend class LoopOptimizationTest; DISALLOW_COPY_AND_ASSIGN(HLoopOptimization); diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index caada8bccb..5617e4bfcb 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -2340,6 +2340,66 @@ void HGraph::TransformLoopHeaderForBCE(HBasicBlock* header) { new_pre_header, old_pre_header, /* replace_if_back_edge */ false); } +HBasicBlock* HGraph::TransformLoopForVectorization(HBasicBlock* header, + HBasicBlock* body, + HBasicBlock* exit) { + DCHECK(header->IsLoopHeader()); + HLoopInformation* loop = header->GetLoopInformation(); + + // Add new loop blocks. + HBasicBlock* new_pre_header = new (arena_) HBasicBlock(this, header->GetDexPc()); + HBasicBlock* new_header = new (arena_) HBasicBlock(this, header->GetDexPc()); + HBasicBlock* new_body = new (arena_) HBasicBlock(this, header->GetDexPc()); + AddBlock(new_pre_header); + AddBlock(new_header); + AddBlock(new_body); + + // Set up control flow. + header->ReplaceSuccessor(exit, new_pre_header); + new_pre_header->AddSuccessor(new_header); + new_header->AddSuccessor(exit); + new_header->AddSuccessor(new_body); + new_body->AddSuccessor(new_header); + + // Set up dominators. + header->ReplaceDominatedBlock(exit, new_pre_header); + new_pre_header->SetDominator(header); + new_pre_header->dominated_blocks_.push_back(new_header); + new_header->SetDominator(new_pre_header); + new_header->dominated_blocks_.push_back(new_body); + new_body->SetDominator(new_header); + new_header->dominated_blocks_.push_back(exit); + exit->SetDominator(new_header); + + // Fix reverse post order. + size_t index_of_header = IndexOfElement(reverse_post_order_, header); + MakeRoomFor(&reverse_post_order_, 2, index_of_header); + reverse_post_order_[++index_of_header] = new_pre_header; + reverse_post_order_[++index_of_header] = new_header; + size_t index_of_body = IndexOfElement(reverse_post_order_, body); + MakeRoomFor(&reverse_post_order_, 1, index_of_body - 1); + reverse_post_order_[index_of_body] = new_body; + + // Add gotos and suspend check (client must add conditional in header and copy environment). + new_pre_header->AddInstruction(new (arena_) HGoto()); + HSuspendCheck* suspend_check = new (arena_) HSuspendCheck(header->GetDexPc()); + new_header->AddInstruction(suspend_check); + new_body->AddInstruction(new (arena_) HGoto()); + + // Update loop information. + new_header->AddBackEdge(new_body); + new_header->GetLoopInformation()->SetSuspendCheck(suspend_check); + new_header->GetLoopInformation()->Populate(); + new_pre_header->SetLoopInformation(loop->GetPreHeader()->GetLoopInformation()); // outward + HLoopInformationOutwardIterator it(*new_header); + for (it.Advance(); !it.Done(); it.Advance()) { + it.Current()->Add(new_pre_header); + it.Current()->Add(new_header); + it.Current()->Add(new_body); + } + return new_pre_header; +} + static void CheckAgainstUpperBound(ReferenceTypeInfo rti, ReferenceTypeInfo upper_bound_rti) REQUIRES_SHARED(Locks::mutator_lock_) { if (rti.IsValid()) { diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 5f5a28c520..52a02c2285 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -400,6 +400,12 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { // put deoptimization instructions, etc. void TransformLoopHeaderForBCE(HBasicBlock* header); + // Adds a new loop directly after the loop with the given header and exit. + // Returns the new preheader. + HBasicBlock* TransformLoopForVectorization(HBasicBlock* header, + HBasicBlock* body, + HBasicBlock* exit); + // Removes `block` from the graph. Assumes `block` has been disconnected from // other blocks and has no instructions or phis. void DeleteDeadEmptyBlock(HBasicBlock* block); @@ -1363,6 +1369,25 @@ class HLoopInformationOutwardIterator : public ValueObject { M(TypeConversion, Instruction) \ M(UShr, BinaryOperation) \ M(Xor, BinaryOperation) \ + M(VecReplicateScalar, VecUnaryOperation) \ + M(VecSetScalars, VecUnaryOperation) \ + M(VecSumReduce, VecUnaryOperation) \ + M(VecCnv, VecUnaryOperation) \ + M(VecNeg, VecUnaryOperation) \ + M(VecNot, VecUnaryOperation) \ + M(VecAdd, VecBinaryOperation) \ + M(VecSub, VecBinaryOperation) \ + M(VecMul, VecBinaryOperation) \ + M(VecDiv, VecBinaryOperation) \ + M(VecAnd, VecBinaryOperation) \ + M(VecAndNot, VecBinaryOperation) \ + M(VecOr, VecBinaryOperation) \ + M(VecXor, VecBinaryOperation) \ + M(VecShl, VecBinaryOperation) \ + M(VecShr, VecBinaryOperation) \ + M(VecUShr, VecBinaryOperation) \ + M(VecLoad, VecMemoryOperation) \ + M(VecStore, VecMemoryOperation) \ /* * Instructions, shared across several (not all) architectures. @@ -1424,7 +1449,11 @@ class HLoopInformationOutwardIterator : public ValueObject { M(Constant, Instruction) \ M(UnaryOperation, Instruction) \ M(BinaryOperation, Instruction) \ - M(Invoke, Instruction) + M(Invoke, Instruction) \ + M(VecOperation, Instruction) \ + M(VecUnaryOperation, VecOperation) \ + M(VecBinaryOperation, VecOperation) \ + M(VecMemoryOperation, VecOperation) #define FOR_EACH_INSTRUCTION(M) \ FOR_EACH_CONCRETE_INSTRUCTION(M) \ @@ -6689,6 +6718,8 @@ class HParallelMove FINAL : public HTemplateInstruction<0> { } // namespace art +#include "nodes_vector.h" + #if defined(ART_ENABLE_CODEGEN_arm) || defined(ART_ENABLE_CODEGEN_arm64) #include "nodes_shared.h" #endif diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h new file mode 100644 index 0000000000..9f9b918f17 --- /dev/null +++ b/compiler/optimizing/nodes_vector.h @@ -0,0 +1,585 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_NODES_VECTOR_H_ +#define ART_COMPILER_OPTIMIZING_NODES_VECTOR_H_ + +// This #include should never be used by compilation, because this header file (nodes_vector.h) +// is included in the header file nodes.h itself. However it gives editing tools better context. +#include "nodes.h" + +namespace art { + +// Memory alignment, represented as an offset relative to a base, where 0 <= offset < base, +// and base is a power of two. For example, the value Alignment(16, 0) means memory is +// perfectly aligned at a 16-byte boundary, whereas the value Alignment(16, 4) means +// memory is always exactly 4 bytes above such a boundary. +class Alignment { + public: + Alignment(size_t base, size_t offset) : base_(base), offset_(offset) { + DCHECK_LT(offset, base); + DCHECK(IsPowerOfTwo(base)); + } + + // Returns true if memory is "at least" aligned at the given boundary. + // Assumes requested base is power of two. + bool IsAlignedAt(size_t base) const { + DCHECK_NE(0u, base); + DCHECK(IsPowerOfTwo(base)); + return ((offset_ | base_) & (base - 1u)) == 0; + } + + std::string ToString() const { + return "ALIGN(" + std::to_string(base_) + "," + std::to_string(offset_) + ")"; + } + + private: + size_t base_; + size_t offset_; +}; + +// +// Definitions of abstract vector operations in HIR. +// + +// Abstraction of a vector operation, i.e., an operation that performs +// GetVectorLength() x GetPackedType() operations simultaneously. +class HVecOperation : public HVariableInputSizeInstruction { + public: + HVecOperation(ArenaAllocator* arena, + Primitive::Type packed_type, + SideEffects side_effects, + size_t number_of_inputs, + size_t vector_length, + uint32_t dex_pc) + : HVariableInputSizeInstruction(side_effects, + dex_pc, + arena, + number_of_inputs, + kArenaAllocVectorNode), + vector_length_(vector_length) { + SetPackedField<TypeField>(packed_type); + DCHECK_LT(1u, vector_length); + } + + // Returns the number of elements packed in a vector. + size_t GetVectorLength() const { + return vector_length_; + } + + // Returns the number of bytes in a full vector. + size_t GetVectorNumberOfBytes() const { + return vector_length_ * Primitive::ComponentSize(GetPackedType()); + } + + // Returns the type of the vector operation: a SIMD operation looks like a FPU location. + // TODO: we could introduce SIMD types in HIR. + Primitive::Type GetType() const OVERRIDE { + return Primitive::kPrimDouble; + } + + // Returns the true component type packed in a vector. + Primitive::Type GetPackedType() const { + return GetPackedField<TypeField>(); + } + + DECLARE_ABSTRACT_INSTRUCTION(VecOperation); + + private: + // Additional packed bits. + static constexpr size_t kFieldType = HInstruction::kNumberOfGenericPackedBits; + static constexpr size_t kFieldTypeSize = + MinimumBitsToStore(static_cast<size_t>(Primitive::kPrimLast)); + static constexpr size_t kNumberOfVectorOpPackedBits = kFieldType + kFieldTypeSize; + static_assert(kNumberOfVectorOpPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); + using TypeField = BitField<Primitive::Type, kFieldType, kFieldTypeSize>; + + const size_t vector_length_; + + DISALLOW_COPY_AND_ASSIGN(HVecOperation); +}; + +// Abstraction of a unary vector operation. +class HVecUnaryOperation : public HVecOperation { + public: + HVecUnaryOperation(ArenaAllocator* arena, + Primitive::Type packed_type, + size_t vector_length, + uint32_t dex_pc) + : HVecOperation(arena, + packed_type, + SideEffects::None(), + /*number_of_inputs*/ 1, + vector_length, + dex_pc) { } + DECLARE_ABSTRACT_INSTRUCTION(VecUnaryOperation); + private: + DISALLOW_COPY_AND_ASSIGN(HVecUnaryOperation); +}; + +// Abstraction of a binary vector operation. +class HVecBinaryOperation : public HVecOperation { + public: + HVecBinaryOperation(ArenaAllocator* arena, + Primitive::Type packed_type, + size_t vector_length, + uint32_t dex_pc) + : HVecOperation(arena, + packed_type, + SideEffects::None(), + /*number_of_inputs*/ 2, + vector_length, + dex_pc) { } + DECLARE_ABSTRACT_INSTRUCTION(VecBinaryOperation); + private: + DISALLOW_COPY_AND_ASSIGN(HVecBinaryOperation); +}; + +// Abstraction of a vector operation that references memory, with an alignment. +// The Android runtime guarantees at least "component size" alignment for array +// elements and, thus, vectors. +class HVecMemoryOperation : public HVecOperation { + public: + HVecMemoryOperation(ArenaAllocator* arena, + Primitive::Type packed_type, + SideEffects side_effects, + size_t number_of_inputs, + size_t vector_length, + uint32_t dex_pc) + : HVecOperation(arena, packed_type, side_effects, number_of_inputs, vector_length, dex_pc), + alignment_(Primitive::ComponentSize(packed_type), 0) { } + + void SetAlignment(Alignment alignment) { alignment_ = alignment; } + + Alignment GetAlignment() const { return alignment_; } + + DECLARE_ABSTRACT_INSTRUCTION(VecMemoryOperation); + + private: + Alignment alignment_; + + DISALLOW_COPY_AND_ASSIGN(HVecMemoryOperation); +}; + +// +// Definitions of concrete vector operations in HIR. +// + +// Replicates the given scalar into a vector, +// viz. replicate(x) = [ x, .. , x ]. +class HVecReplicateScalar FINAL : public HVecUnaryOperation { + public: + HVecReplicateScalar(ArenaAllocator* arena, + HInstruction* scalar, + Primitive::Type packed_type, + size_t vector_length, + uint32_t dex_pc = kNoDexPc) + : HVecUnaryOperation(arena, packed_type, vector_length, dex_pc) { + SetRawInputAt(0, scalar); + } + DECLARE_INSTRUCTION(VecReplicateScalar); + private: + DISALLOW_COPY_AND_ASSIGN(HVecReplicateScalar); +}; + +// Assigns the given scalar elements to a vector, +// viz. set( array(x1, .., xn) ) = [ x1, .. , xn ]. +class HVecSetScalars FINAL : public HVecUnaryOperation { + HVecSetScalars(ArenaAllocator* arena, + HInstruction** scalars, // array + Primitive::Type packed_type, + size_t vector_length, + uint32_t dex_pc = kNoDexPc) + : HVecUnaryOperation(arena, packed_type, vector_length, dex_pc) { + for (size_t i = 0; i < vector_length; i++) { + SetRawInputAt(0, scalars[i]); + } + } + DECLARE_INSTRUCTION(VecSetScalars); + private: + DISALLOW_COPY_AND_ASSIGN(HVecSetScalars); +}; + +// Sum-reduces the given vector into a shorter vector (m < n) or scalar (m = 1), +// viz. sum-reduce[ x1, .. , xn ] = [ y1, .., ym ], where yi = sum_j x_j. +class HVecSumReduce FINAL : public HVecUnaryOperation { + HVecSumReduce(ArenaAllocator* arena, + HInstruction* input, + Primitive::Type packed_type, + size_t vector_length, + uint32_t dex_pc = kNoDexPc) + : HVecUnaryOperation(arena, packed_type, vector_length, dex_pc) { + DCHECK(input->IsVecOperation()); + DCHECK_EQ(input->AsVecOperation()->GetPackedType(), packed_type); + SetRawInputAt(0, input); + } + + // TODO: probably integral promotion + Primitive::Type GetType() const OVERRIDE { return GetPackedType(); } + + DECLARE_INSTRUCTION(VecSumReduce); + private: + DISALLOW_COPY_AND_ASSIGN(HVecSumReduce); +}; + +// Converts every component in the vector, +// viz. cnv[ x1, .. , xn ] = [ cnv(x1), .. , cnv(xn) ]. +class HVecCnv FINAL : public HVecUnaryOperation { + public: + HVecCnv(ArenaAllocator* arena, + HInstruction* input, + Primitive::Type packed_type, + size_t vector_length, + uint32_t dex_pc = kNoDexPc) + : HVecUnaryOperation(arena, packed_type, vector_length, dex_pc) { + DCHECK(input->IsVecOperation()); + DCHECK_NE(input->AsVecOperation()->GetPackedType(), packed_type); // actual convert + SetRawInputAt(0, input); + } + + Primitive::Type GetInputType() const { return InputAt(0)->AsVecOperation()->GetPackedType(); } + Primitive::Type GetResultType() const { return GetPackedType(); } + + DECLARE_INSTRUCTION(VecCnv); + + private: + DISALLOW_COPY_AND_ASSIGN(HVecCnv); +}; + +// Negates every component in the vector, +// viz. neg[ x1, .. , xn ] = [ -x1, .. , -xn ]. +class HVecNeg FINAL : public HVecUnaryOperation { + public: + HVecNeg(ArenaAllocator* arena, + HInstruction* input, + Primitive::Type packed_type, + size_t vector_length, + uint32_t dex_pc = kNoDexPc) + : HVecUnaryOperation(arena, packed_type, vector_length, dex_pc) { + DCHECK(input->IsVecOperation()); + DCHECK_EQ(input->AsVecOperation()->GetPackedType(), packed_type); + SetRawInputAt(0, input); + } + DECLARE_INSTRUCTION(VecNeg); + private: + DISALLOW_COPY_AND_ASSIGN(HVecNeg); +}; + +// Bitwise- or boolean-nots every component in the vector, +// viz. not[ x1, .. , xn ] = [ ~x1, .. , ~xn ], or +// not[ x1, .. , xn ] = [ !x1, .. , !xn ] for boolean. +class HVecNot FINAL : public HVecUnaryOperation { + public: + HVecNot(ArenaAllocator* arena, + HInstruction* input, + Primitive::Type packed_type, + size_t vector_length, + uint32_t dex_pc = kNoDexPc) + : HVecUnaryOperation(arena, packed_type, vector_length, dex_pc) { + DCHECK(input->IsVecOperation()); + SetRawInputAt(0, input); + } + DECLARE_INSTRUCTION(VecNot); + private: + DISALLOW_COPY_AND_ASSIGN(HVecNot); +}; + +// Adds every component in the two vectors, +// viz. [ x1, .. , xn ] + [ y1, .. , yn ] = [ x1 + y1, .. , xn + yn ]. +class HVecAdd FINAL : public HVecBinaryOperation { + public: + HVecAdd(ArenaAllocator* arena, + HInstruction* left, + HInstruction* right, + Primitive::Type packed_type, + size_t vector_length, + uint32_t dex_pc = kNoDexPc) + : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) { + DCHECK(left->IsVecOperation() && right->IsVecOperation()); + DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type); + DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type); + SetRawInputAt(0, left); + SetRawInputAt(1, right); + } + DECLARE_INSTRUCTION(VecAdd); + private: + DISALLOW_COPY_AND_ASSIGN(HVecAdd); +}; + +// Subtracts every component in the two vectors, +// viz. [ x1, .. , xn ] - [ y1, .. , yn ] = [ x1 - y1, .. , xn - yn ]. +class HVecSub FINAL : public HVecBinaryOperation { + public: + HVecSub(ArenaAllocator* arena, + HInstruction* left, + HInstruction* right, + Primitive::Type packed_type, + size_t vector_length, + uint32_t dex_pc = kNoDexPc) + : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) { + DCHECK(left->IsVecOperation() && right->IsVecOperation()); + DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type); + DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type); + SetRawInputAt(0, left); + SetRawInputAt(1, right); + } + DECLARE_INSTRUCTION(VecSub); + private: + DISALLOW_COPY_AND_ASSIGN(HVecSub); +}; + +// Multiplies every component in the two vectors, +// viz. [ x1, .. , xn ] * [ y1, .. , yn ] = [ x1 * y1, .. , xn * yn ]. +class HVecMul FINAL : public HVecBinaryOperation { + public: + HVecMul(ArenaAllocator* arena, + HInstruction* left, + HInstruction* right, + Primitive::Type packed_type, + size_t vector_length, + uint32_t dex_pc = kNoDexPc) + : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) { + DCHECK(left->IsVecOperation() && right->IsVecOperation()); + DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type); + DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type); + SetRawInputAt(0, left); + SetRawInputAt(1, right); + } + DECLARE_INSTRUCTION(VecMul); + private: + DISALLOW_COPY_AND_ASSIGN(HVecMul); +}; + +// Divides every component in the two vectors, +// viz. [ x1, .. , xn ] / [ y1, .. , yn ] = [ x1 / y1, .. , xn / yn ]. +class HVecDiv FINAL : public HVecBinaryOperation { + public: + HVecDiv(ArenaAllocator* arena, + HInstruction* left, + HInstruction* right, + Primitive::Type packed_type, + size_t vector_length, + uint32_t dex_pc = kNoDexPc) + : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) { + DCHECK(left->IsVecOperation() && right->IsVecOperation()); + DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type); + DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type); + SetRawInputAt(0, left); + SetRawInputAt(1, right); + } + DECLARE_INSTRUCTION(VecDiv); + private: + DISALLOW_COPY_AND_ASSIGN(HVecDiv); +}; + +// Bitwise-ands every component in the two vectors, +// viz. [ x1, .. , xn ] & [ y1, .. , yn ] = [ x1 & y1, .. , xn & yn ]. +class HVecAnd FINAL : public HVecBinaryOperation { + public: + HVecAnd(ArenaAllocator* arena, + HInstruction* left, + HInstruction* right, + Primitive::Type packed_type, + size_t vector_length, + uint32_t dex_pc = kNoDexPc) + : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) { + DCHECK(left->IsVecOperation() && right->IsVecOperation()); + SetRawInputAt(0, left); + SetRawInputAt(1, right); + } + DECLARE_INSTRUCTION(VecAnd); + private: + DISALLOW_COPY_AND_ASSIGN(HVecAnd); +}; + +// Bitwise-and-nots every component in the two vectors, +// viz. [ x1, .. , xn ] and-not [ y1, .. , yn ] = [ ~x1 & y1, .. , ~xn & yn ]. +class HVecAndNot FINAL : public HVecBinaryOperation { + public: + HVecAndNot(ArenaAllocator* arena, + HInstruction* left, + HInstruction* right, + Primitive::Type packed_type, + size_t vector_length, + uint32_t dex_pc = kNoDexPc) + : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) { + DCHECK(left->IsVecOperation() && right->IsVecOperation()); + SetRawInputAt(0, left); + SetRawInputAt(1, right); + } + DECLARE_INSTRUCTION(VecAndNot); + private: + DISALLOW_COPY_AND_ASSIGN(HVecAndNot); +}; + +// Bitwise-ors every component in the two vectors, +// viz. [ x1, .. , xn ] | [ y1, .. , yn ] = [ x1 | y1, .. , xn | yn ]. +class HVecOr FINAL : public HVecBinaryOperation { + public: + HVecOr(ArenaAllocator* arena, + HInstruction* left, + HInstruction* right, + Primitive::Type packed_type, + size_t vector_length, + uint32_t dex_pc = kNoDexPc) + : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) { + DCHECK(left->IsVecOperation() && right->IsVecOperation()); + SetRawInputAt(0, left); + SetRawInputAt(1, right); + } + DECLARE_INSTRUCTION(VecOr); + private: + DISALLOW_COPY_AND_ASSIGN(HVecOr); +}; + +// Bitwise-xors every component in the two vectors, +// viz. [ x1, .. , xn ] ^ [ y1, .. , yn ] = [ x1 ^ y1, .. , xn ^ yn ]. +class HVecXor FINAL : public HVecBinaryOperation { + public: + HVecXor(ArenaAllocator* arena, + HInstruction* left, + HInstruction* right, + Primitive::Type packed_type, + size_t vector_length, + uint32_t dex_pc = kNoDexPc) + : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) { + DCHECK(left->IsVecOperation() && right->IsVecOperation()); + SetRawInputAt(0, left); + SetRawInputAt(1, right); + } + DECLARE_INSTRUCTION(VecXor); + private: + DISALLOW_COPY_AND_ASSIGN(HVecXor); +}; + +// Logically shifts every component in the vector left by the given distance, +// viz. [ x1, .. , xn ] << d = [ x1 << d, .. , xn << d ]. +class HVecShl FINAL : public HVecBinaryOperation { + public: + HVecShl(ArenaAllocator* arena, + HInstruction* left, + HInstruction* right, + Primitive::Type packed_type, + size_t vector_length, + uint32_t dex_pc = kNoDexPc) + : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) { + DCHECK(left->IsVecOperation()); + DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type); + SetRawInputAt(0, left); + SetRawInputAt(1, right); + } + DECLARE_INSTRUCTION(VecShl); + private: + DISALLOW_COPY_AND_ASSIGN(HVecShl); +}; + +// Arithmetically shifts every component in the vector right by the given distance, +// viz. [ x1, .. , xn ] >> d = [ x1 >> d, .. , xn >> d ]. +class HVecShr FINAL : public HVecBinaryOperation { + public: + HVecShr(ArenaAllocator* arena, + HInstruction* left, + HInstruction* right, + Primitive::Type packed_type, + size_t vector_length, + uint32_t dex_pc = kNoDexPc) + : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) { + DCHECK(left->IsVecOperation()); + DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type); + SetRawInputAt(0, left); + SetRawInputAt(1, right); + } + DECLARE_INSTRUCTION(VecShr); + private: + DISALLOW_COPY_AND_ASSIGN(HVecShr); +}; + +// Logically shifts every component in the vector right by the given distance, +// viz. [ x1, .. , xn ] >>> d = [ x1 >>> d, .. , xn >>> d ]. +class HVecUShr FINAL : public HVecBinaryOperation { + public: + HVecUShr(ArenaAllocator* arena, + HInstruction* left, + HInstruction* right, + Primitive::Type packed_type, + size_t vector_length, + uint32_t dex_pc = kNoDexPc) + : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) { + DCHECK(left->IsVecOperation()); + DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type); + SetRawInputAt(0, left); + SetRawInputAt(1, right); + } + DECLARE_INSTRUCTION(VecUShr); + private: + DISALLOW_COPY_AND_ASSIGN(HVecUShr); +}; + +// Loads a vector from memory, viz. load(mem, 1) +// yield the vector [ mem(1), .. , mem(n) ]. +class HVecLoad FINAL : public HVecMemoryOperation { + public: + HVecLoad(ArenaAllocator* arena, + HInstruction* base, + HInstruction* index, + Primitive::Type packed_type, + size_t vector_length, + uint32_t dex_pc = kNoDexPc) + : HVecMemoryOperation(arena, + packed_type, + SideEffects::ArrayReadOfType(packed_type), + /*number_of_inputs*/ 2, + vector_length, + dex_pc) { + SetRawInputAt(0, base); + SetRawInputAt(1, index); + } + DECLARE_INSTRUCTION(VecLoad); + private: + DISALLOW_COPY_AND_ASSIGN(HVecLoad); +}; + +// Stores a vector to memory, viz. store(m, 1, [x1, .. , xn] ) +// sets mem(1) = x1, .. , mem(n) = xn. +class HVecStore FINAL : public HVecMemoryOperation { + public: + HVecStore(ArenaAllocator* arena, + HInstruction* base, + HInstruction* index, + HInstruction* value, + Primitive::Type packed_type, + size_t vector_length, + uint32_t dex_pc = kNoDexPc) + : HVecMemoryOperation(arena, + packed_type, + SideEffects::ArrayWriteOfType(packed_type), + /*number_of_inputs*/ 3, + vector_length, + dex_pc) { + DCHECK(value->IsVecOperation()); + DCHECK_EQ(value->AsVecOperation()->GetPackedType(), packed_type); + SetRawInputAt(0, base); + SetRawInputAt(1, index); + SetRawInputAt(2, value); + } + DECLARE_INSTRUCTION(VecStore); + private: + DISALLOW_COPY_AND_ASSIGN(HVecStore); +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_NODES_VECTOR_H_ diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index 36ee5a903a..b538a89a06 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -470,7 +470,12 @@ bool LiveInterval::SameRegisterKind(Location other) const { } size_t LiveInterval::NumberOfSpillSlotsNeeded() const { - // TODO: detect vector operation. + // For a SIMD operation, compute the number of needed spill slots. + // TODO: do through vector type? + HInstruction* definition = GetParent()->GetDefinedBy(); + if (definition != nullptr && definition->IsVecOperation()) { + return definition->AsVecOperation()->GetVectorNumberOfBytes() / kVRegSize; + } // Return number of needed spill slots based on type. return (type_ == Primitive::kPrimLong || type_ == Primitive::kPrimDouble) ? 2 : 1; } diff --git a/test/640-checker-boolean-simd/expected.txt b/test/640-checker-boolean-simd/expected.txt new file mode 100644 index 0000000000..b0aad4deb5 --- /dev/null +++ b/test/640-checker-boolean-simd/expected.txt @@ -0,0 +1 @@ +passed diff --git a/test/640-checker-boolean-simd/info.txt b/test/640-checker-boolean-simd/info.txt new file mode 100644 index 0000000000..c9c6d5ed9f --- /dev/null +++ b/test/640-checker-boolean-simd/info.txt @@ -0,0 +1 @@ +Functional tests on SIMD vectorization. diff --git a/test/640-checker-boolean-simd/src/Main.java b/test/640-checker-boolean-simd/src/Main.java new file mode 100644 index 0000000000..f8239faaf3 --- /dev/null +++ b/test/640-checker-boolean-simd/src/Main.java @@ -0,0 +1,136 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Functional tests for SIMD vectorization. + */ +public class Main { + + static boolean[] a; + + // + // Arithmetic operations. + // + + /// CHECK-START: void Main.and(boolean) loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.and(boolean) loop_optimization (after) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecAnd loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore loop:<<Loop>> outer_loop:none + static void and(boolean x) { + for (int i = 0; i < 128; i++) + a[i] &= x; // NOTE: bitwise and, not the common && + } + + /// CHECK-START: void Main.or(boolean) loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.or(boolean) loop_optimization (after) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecOr loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore loop:<<Loop>> outer_loop:none + static void or(boolean x) { + for (int i = 0; i < 128; i++) + a[i] |= x; // NOTE: bitwise or, not the common || + } + + /// CHECK-START: void Main.xor(boolean) loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.xor(boolean) loop_optimization (after) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecXor loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore loop:<<Loop>> outer_loop:none + static void xor(boolean x) { + for (int i = 0; i < 128; i++) + a[i] ^= x; // NOTE: bitwise xor + } + + /// CHECK-START: void Main.not() loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.not() loop_optimization (after) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecNot loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore loop:<<Loop>> outer_loop:none + static void not() { + for (int i = 0; i < 128; i++) + a[i] = !a[i]; + } + + // + // Test Driver. + // + + public static void main(String[] args) { + // Set up. + a = new boolean[128]; + for (int i = 0; i < 128; i++) { + a[i] = (i & 1) == 0; + } + // Arithmetic operations. + and(true); + for (int i = 0; i < 128; i++) { + expectEquals((i & 1) == 0, a[i], "and-true"); + } + xor(true); + for (int i = 0; i < 128; i++) { + expectEquals((i & 1) != 0, a[i], "xor-true"); + } + xor(false); + for (int i = 0; i < 128; i++) { + expectEquals((i & 1) != 0, a[i], "xor-false"); + } + not(); + for (int i = 0; i < 128; i++) { + expectEquals((i & 1) == 0, a[i], "not"); + } + or(true); + for (int i = 0; i < 128; i++) { + expectEquals(true, a[i], "or-true"); + } + and(false); + for (int i = 0; i < 128; i++) { + expectEquals(false, a[i], "and-false"); + } + or(false); + for (int i = 0; i < 128; i++) { + expectEquals(false, a[i], "or-false"); + } + // Done. + System.out.println("passed"); + } + + private static void expectEquals(boolean expected, boolean result, String action) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result + " for " + action); + } + } +} diff --git a/test/640-checker-byte-simd/expected.txt b/test/640-checker-byte-simd/expected.txt new file mode 100644 index 0000000000..b0aad4deb5 --- /dev/null +++ b/test/640-checker-byte-simd/expected.txt @@ -0,0 +1 @@ +passed diff --git a/test/640-checker-byte-simd/info.txt b/test/640-checker-byte-simd/info.txt new file mode 100644 index 0000000000..c9c6d5ed9f --- /dev/null +++ b/test/640-checker-byte-simd/info.txt @@ -0,0 +1 @@ +Functional tests on SIMD vectorization. diff --git a/test/640-checker-byte-simd/src/Main.java b/test/640-checker-byte-simd/src/Main.java new file mode 100644 index 0000000000..0f7452b045 --- /dev/null +++ b/test/640-checker-byte-simd/src/Main.java @@ -0,0 +1,277 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Functional tests for SIMD vectorization. + */ +public class Main { + + static byte[] a; + + // + // Arithmetic operations. + // + + /// CHECK-START: void Main.add(int) loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.add(int) loop_optimization (after) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecAdd loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore loop:<<Loop>> outer_loop:none + static void add(int x) { + for (int i = 0; i < 128; i++) + a[i] += x; + } + + /// CHECK-START: void Main.sub(int) loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.sub(int) loop_optimization (after) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecSub loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore loop:<<Loop>> outer_loop:none + static void sub(int x) { + for (int i = 0; i < 128; i++) + a[i] -= x; + } + + /// CHECK-START: void Main.mul(int) loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.mul(int) loop_optimization (after) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecMul loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore loop:<<Loop>> outer_loop:none + static void mul(int x) { + for (int i = 0; i < 128; i++) + a[i] *= x; + } + + /// CHECK-START: void Main.div(int) loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START: void Main.div(int) loop_optimization (after) + // + // Not supported on any architecture. + // + static void div(int x) { + for (int i = 0; i < 128; i++) + a[i] /= x; + } + + /// CHECK-START: void Main.neg() loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.neg() loop_optimization (after) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecNeg loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore loop:<<Loop>> outer_loop:none + static void neg() { + for (int i = 0; i < 128; i++) + a[i] = (byte) -a[i]; + } + + /// CHECK-START: void Main.not() loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.not() loop_optimization (after) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecNot loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore loop:<<Loop>> outer_loop:none + static void not() { + for (int i = 0; i < 128; i++) + a[i] = (byte) ~a[i]; + } + + /// CHECK-START: void Main.shl4() loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.shl4() loop_optimization (after) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecShl loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore loop:<<Loop>> outer_loop:none + static void shl4() { + for (int i = 0; i < 128; i++) + a[i] <<= 4; + } + + /// CHECK-START: void Main.sar2() loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.sar2() loop_optimization (after) + // + // TODO: fill in when supported + static void sar2() { + for (int i = 0; i < 128; i++) + a[i] >>= 2; + } + + /// CHECK-START: void Main.shr2() loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.shr2() loop_optimization (after) + // + // TODO: fill in when supported + static void shr2() { + for (int i = 0; i < 128; i++) + a[i] >>>= 2; + } + + // + // Shift sanity. + // + + static void sar31() { + for (int i = 0; i < 128; i++) + a[i] >>= 31; + } + + static void shr31() { + for (int i = 0; i < 128; i++) + a[i] >>>= 31; + } + + static void shr32() { + for (int i = 0; i < 128; i++) + a[i] >>>= 32; // 0, since & 31 + } + + static void shr33() { + for (int i = 0; i < 128; i++) + a[i] >>>= 33; // 1, since & 31 + } + + // + // Loop bounds. + // + + static void bounds() { + for (int i = 1; i < 127; i++) + a[i] += 11; + } + + // + // Test Driver. + // + + public static void main(String[] args) { + // Set up. + a = new byte[128]; + for (int i = 0; i < 128; i++) { + a[i] = (byte) i; + } + // Arithmetic operations. + add(2); + for (int i = 0; i < 128; i++) { + expectEquals((byte)(i + 2), a[i], "add"); + } + sub(2); + for (int i = 0; i < 128; i++) { + expectEquals(i, a[i], "sub"); + } + mul(2); + for (int i = 0; i < 128; i++) { + expectEquals((byte)(i + i), a[i], "mul"); + } + div(2); + for (int i = 0; i < 128; i++) { + expectEquals(((byte)(i + i)) >> 1, a[i], "div"); + a[i] = (byte) i; // undo arithmetic wrap-around effects + } + neg(); + for (int i = 0; i < 128; i++) { + expectEquals(-i, a[i], "neg"); + } + // Loop bounds. + bounds(); + expectEquals(0, a[0], "bounds0"); + for (int i = 1; i < 127; i++) { + expectEquals(11 - i, a[i], "bounds"); + } + expectEquals(-127, a[127], "bounds127"); + // Shifts. + for (int i = 0; i < 128; i++) { + a[i] = (byte) 0xff; + } + shl4(); + for (int i = 0; i < 128; i++) { + expectEquals((byte) 0xf0, a[i], "shl4"); + } + sar2(); + for (int i = 0; i < 128; i++) { + expectEquals((byte) 0xfc, a[i], "sar2"); + } + shr2(); + for (int i = 0; i < 128; i++) { + expectEquals((byte) 0xff, a[i], "shr2"); // sic! + } + sar31(); + for (int i = 0; i < 128; i++) { + expectEquals((byte) 0xff, a[i], "sar31"); + } + shr31(); + for (int i = 0; i < 128; i++) { + expectEquals(0x01, a[i], "shr31"); + a[i] = (byte) 0x12; // reset + } + shr32(); + for (int i = 0; i < 128; i++) { + expectEquals((byte) 0x12, a[i], "shr32"); + } + shr33(); + for (int i = 0; i < 128; i++) { + expectEquals((byte) 0x09, a[i], "shr33"); + a[i] = (byte) 0xf0; // reset + } + not(); + for (int i = 0; i < 128; i++) { + expectEquals((byte) 0x0f, a[i], "not"); + } + // Done. + System.out.println("passed"); + } + + private static void expectEquals(int expected, int result, String action) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result + " for " + action); + } + } +} diff --git a/test/640-checker-char-simd/expected.txt b/test/640-checker-char-simd/expected.txt new file mode 100644 index 0000000000..b0aad4deb5 --- /dev/null +++ b/test/640-checker-char-simd/expected.txt @@ -0,0 +1 @@ +passed diff --git a/test/640-checker-char-simd/info.txt b/test/640-checker-char-simd/info.txt new file mode 100644 index 0000000000..c9c6d5ed9f --- /dev/null +++ b/test/640-checker-char-simd/info.txt @@ -0,0 +1 @@ +Functional tests on SIMD vectorization. diff --git a/test/640-checker-char-simd/src/Main.java b/test/640-checker-char-simd/src/Main.java new file mode 100644 index 0000000000..0628b36003 --- /dev/null +++ b/test/640-checker-char-simd/src/Main.java @@ -0,0 +1,278 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Functional tests for SIMD vectorization. + */ +public class Main { + + static char[] a; + + // + // Arithmetic operations. + // + + /// CHECK-START: void Main.add(int) loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.add(int) loop_optimization (after) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecAdd loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore loop:<<Loop>> outer_loop:none + static void add(int x) { + for (int i = 0; i < 128; i++) + a[i] += x; + } + + /// CHECK-START: void Main.sub(int) loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.sub(int) loop_optimization (after) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecSub loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore loop:<<Loop>> outer_loop:none + static void sub(int x) { + for (int i = 0; i < 128; i++) + a[i] -= x; + } + + /// CHECK-START: void Main.mul(int) loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.mul(int) loop_optimization (after) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecMul loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore loop:<<Loop>> outer_loop:none + static void mul(int x) { + for (int i = 0; i < 128; i++) + a[i] *= x; + } + + /// CHECK-START: void Main.div(int) loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START: void Main.div(int) loop_optimization (after) + // + // Not supported on any architecture. + // + static void div(int x) { + for (int i = 0; i < 128; i++) + a[i] /= x; + } + + /// CHECK-START: void Main.neg() loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.neg() loop_optimization (after) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecNeg loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore loop:<<Loop>> outer_loop:none + static void neg() { + for (int i = 0; i < 128; i++) + a[i] = (char) -a[i]; + } + + /// CHECK-START: void Main.not() loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.not() loop_optimization (after) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecNot loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore loop:<<Loop>> outer_loop:none + static void not() { + for (int i = 0; i < 128; i++) + a[i] = (char) ~a[i]; + } + + /// CHECK-START: void Main.shl4() loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.shl4() loop_optimization (after) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecShl loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore loop:<<Loop>> outer_loop:none + static void shl4() { + for (int i = 0; i < 128; i++) + a[i] <<= 4; + } + + /// CHECK-START: void Main.sar2() loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.sar2() loop_optimization (after) + // + // TODO: fill in when supported + static void sar2() { + for (int i = 0; i < 128; i++) + a[i] >>= 2; + } + + /// CHECK-START: void Main.shr2() loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.shr2() loop_optimization (after) + // + // TODO: fill in when supported + static void shr2() { + for (int i = 0; i < 128; i++) + a[i] >>>= 2; + } + + // + // Shift sanity. + // + + static void sar31() { + for (int i = 0; i < 128; i++) + a[i] >>= 31; + } + + static void shr31() { + for (int i = 0; i < 128; i++) + a[i] >>>= 31; + } + + static void shr32() { + for (int i = 0; i < 128; i++) + a[i] >>>= 32; // 0, since & 31 + } + + static void shr33() { + for (int i = 0; i < 128; i++) + a[i] >>>= 33; // 1, since & 31 + } + + // + // Loop bounds. + // + + static void bounds() { + for (int i = 1; i < 127; i++) + a[i] += 11; + } + + // + // Test Driver. + // + + public static void main(String[] args) { + // Set up. + a = new char[128]; + for (int i = 0; i < 128; i++) { + a[i] = (char) i; + } + // Arithmetic operations. + add(2); + for (int i = 0; i < 128; i++) { + expectEquals(i + 2, a[i], "add"); + } + sub(2); + for (int i = 0; i < 128; i++) { + expectEquals(i, a[i], "sub"); + } + mul(2); + for (int i = 0; i < 128; i++) { + expectEquals(i + i, a[i], "mul"); + } + div(2); + for (int i = 0; i < 128; i++) { + expectEquals(i, a[i], "div"); + } + neg(); + for (int i = 0; i < 128; i++) { + expectEquals((char)-i, a[i], "neg"); + } + // Loop bounds. + bounds(); + expectEquals(0, a[0], "bounds0"); + for (int i = 1; i < 127; i++) { + expectEquals((char)(11 - i), a[i], "bounds"); + } + expectEquals((char)-127, a[127], "bounds127"); + // Shifts. + for (int i = 0; i < 128; i++) { + a[i] = (char) 0xffff; + } + shl4(); + for (int i = 0; i < 128; i++) { + expectEquals((char) 0xfff0, a[i], "shl4"); + } + sar2(); + for (int i = 0; i < 128; i++) { + expectEquals((char) 0x3ffc, a[i], "sar2"); + } + shr2(); + for (int i = 0; i < 128; i++) { + expectEquals((char) 0x0fff, a[i], "shr2"); + a[i] = (char) 0xffff; // reset + } + sar31(); + for (int i = 0; i < 128; i++) { + expectEquals(0, a[i], "sar31"); + a[i] = (char) 0xffff; // reset + } + shr31(); + for (int i = 0; i < 128; i++) { + expectEquals(0, a[i], "shr31"); + a[i] = (char) 0x1200; // reset + } + shr32(); + for (int i = 0; i < 128; i++) { + expectEquals((char) 0x1200, a[i], "shr32"); + } + shr33(); + for (int i = 0; i < 128; i++) { + expectEquals((char) 0x0900, a[i], "shr33"); + a[i] = (char) 0xf1f0; // reset + } + not(); + for (int i = 0; i < 128; i++) { + expectEquals((char) 0x0e0f, a[i], "not"); + } + // Done. + System.out.println("passed"); + } + + private static void expectEquals(int expected, int result, String action) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result + " for " + action); + } + } +} diff --git a/test/640-checker-double-simd/expected.txt b/test/640-checker-double-simd/expected.txt new file mode 100644 index 0000000000..b0aad4deb5 --- /dev/null +++ b/test/640-checker-double-simd/expected.txt @@ -0,0 +1 @@ +passed diff --git a/test/640-checker-double-simd/info.txt b/test/640-checker-double-simd/info.txt new file mode 100644 index 0000000000..c9c6d5ed9f --- /dev/null +++ b/test/640-checker-double-simd/info.txt @@ -0,0 +1 @@ +Functional tests on SIMD vectorization. diff --git a/test/640-checker-double-simd/src/Main.java b/test/640-checker-double-simd/src/Main.java new file mode 100644 index 0000000000..43f65f1792 --- /dev/null +++ b/test/640-checker-double-simd/src/Main.java @@ -0,0 +1,195 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Functional tests for SIMD vectorization. Note that this class provides a mere + * functional test, not a precise numerical verifier. + */ +public class Main { + + static double[] a; + + // + // Arithmetic operations. + // + + /// CHECK-START: void Main.add(double) loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.add(double) loop_optimization (after) + // + // TODO: fill in when supported + static void add(double x) { + for (int i = 0; i < 128; i++) + a[i] += x; + } + + /// CHECK-START: void Main.sub(double) loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.sub(double) loop_optimization (after) + // + // TODO: fill in when supported + static void sub(double x) { + for (int i = 0; i < 128; i++) + a[i] -= x; + } + + /// CHECK-START: void Main.mul(double) loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.mul(double) loop_optimization (after) + // + // TODO: fill in when supported + static void mul(double x) { + for (int i = 0; i < 128; i++) + a[i] *= x; + } + + /// CHECK-START: void Main.div(double) loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.div(double) loop_optimization (after) + // + // TODO: fill in when supported + static void div(double x) { + for (int i = 0; i < 128; i++) + a[i] /= x; + } + + /// CHECK-START: void Main.neg() loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.neg() loop_optimization (after) + // + // TODO: fill in when supported + static void neg() { + for (int i = 0; i < 128; i++) + a[i] = -a[i]; + } + + /// CHECK-START: void Main.abs() loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.abs() loop_optimization (after) + // + // TODO: fill in when supported + static void abs() { + for (int i = 0; i < 128; i++) + a[i] = Math.abs(a[i]); + } + + /// CHECK-START: void Main.conv(long[]) loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.conv(long[]) loop_optimization (after) + // + // TODO: fill in when supported + static void conv(long[] b) { + for (int i = 0; i < 128; i++) + a[i] = b[i]; + } + + // + // Loop bounds. + // + + static void bounds() { + for (int i = 1; i < 127; i++) + a[i] += 11; + } + + // + // Test Driver. + // + + public static void main(String[] args) { + // Set up. + a = new double[128]; + for (int i = 0; i < 128; i++) { + a[i] = i; + } + // Arithmetic operations. + add(2.0); + for (int i = 0; i < 128; i++) { + expectEquals(i + 2, a[i], "add"); + } + sub(2.0); + for (int i = 0; i < 128; i++) { + expectEquals(i, a[i], "sub"); + } + mul(2.0); + for (int i = 0; i < 128; i++) { + expectEquals(i + i, a[i], "mul"); + } + div(2.0); + for (int i = 0; i < 128; i++) { + expectEquals(i, a[i], "div"); + } + neg(); + for (int i = 0; i < 128; i++) { + expectEquals(-i, a[i], "neg"); + } + // Loop bounds. + bounds(); + expectEquals(0, a[0], "bounds0"); + for (int i = 1; i < 127; i++) { + expectEquals(11 - i, a[i], "bounds"); + } + expectEquals(-127, a[127], "bounds127"); + // Abs. + abs(); + expectEquals(0, a[0], "abs0"); + for (int i = 1; i <= 11; i++) { + expectEquals(11 - i, a[i], "abs_lo"); + } + for (int i = 12; i < 127; i++) { + expectEquals(i - 11, a[i], "abs_hi"); + } + expectEquals(127, a[127], "abs127"); + // Conversion. + long[] b = new long[128]; + for (int i = 0; i < 128; i++) { + b[i] = 1000 * i; + } + conv(b); + for (int i = 1; i < 127; i++) { + expectEquals(1000.0 * i, a[i], "conv"); + } + // Done. + System.out.println("passed"); + } + + private static void expectEquals(double expected, double result, String action) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result + " for " + action); + } + } +} diff --git a/test/640-checker-float-simd/expected.txt b/test/640-checker-float-simd/expected.txt new file mode 100644 index 0000000000..b0aad4deb5 --- /dev/null +++ b/test/640-checker-float-simd/expected.txt @@ -0,0 +1 @@ +passed diff --git a/test/640-checker-float-simd/info.txt b/test/640-checker-float-simd/info.txt new file mode 100644 index 0000000000..c9c6d5ed9f --- /dev/null +++ b/test/640-checker-float-simd/info.txt @@ -0,0 +1 @@ +Functional tests on SIMD vectorization. diff --git a/test/640-checker-float-simd/src/Main.java b/test/640-checker-float-simd/src/Main.java new file mode 100644 index 0000000000..80c3112b6a --- /dev/null +++ b/test/640-checker-float-simd/src/Main.java @@ -0,0 +1,207 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Functional tests for SIMD vectorization. Note that this class provides a mere + * functional test, not a precise numerical verifier. + */ +public class Main { + + static float[] a; + + // + // Arithmetic operations. + // + + /// CHECK-START: void Main.add(float) loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.add(float) loop_optimization (after) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecAdd loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore loop:<<Loop>> outer_loop:none + static void add(float x) { + for (int i = 0; i < 128; i++) + a[i] += x; + } + + /// CHECK-START: void Main.sub(float) loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.sub(float) loop_optimization (after) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecSub loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore loop:<<Loop>> outer_loop:none + static void sub(float x) { + for (int i = 0; i < 128; i++) + a[i] -= x; + } + + /// CHECK-START: void Main.mul(float) loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.mul(float) loop_optimization (after) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecMul loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore loop:<<Loop>> outer_loop:none + static void mul(float x) { + for (int i = 0; i < 128; i++) + a[i] *= x; + } + + /// CHECK-START: void Main.div(float) loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.div(float) loop_optimization (after) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecDiv loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore loop:<<Loop>> outer_loop:none + static void div(float x) { + for (int i = 0; i < 128; i++) + a[i] /= x; + } + + /// CHECK-START: void Main.neg() loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.neg() loop_optimization (after) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecNeg loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore loop:<<Loop>> outer_loop:none + static void neg() { + for (int i = 0; i < 128; i++) + a[i] = -a[i]; + } + + /// CHECK-START: void Main.abs() loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.abs() loop_optimization (after) + // + // TODO: fill in when supported + static void abs() { + for (int i = 0; i < 128; i++) + a[i] = Math.abs(a[i]); + } + + /// CHECK-START: void Main.conv(int[]) loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.conv(int[]) loop_optimization (after) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecCnv loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore loop:<<Loop>> outer_loop:none + static void conv(int[] b) { + for (int i = 0; i < 128; i++) + a[i] = b[i]; + } + + // + // Loop bounds. + // + + static void bounds() { + for (int i = 1; i < 127; i++) + a[i] += 11; + } + + // + // Test Driver. + // + + public static void main(String[] args) { + // Set up. + a = new float[128]; + for (int i = 0; i < 128; i++) { + a[i] = i; + } + // Arithmetic operations. + add(2.0f); + for (int i = 0; i < 128; i++) { + expectEquals(i + 2, a[i], "add"); + } + sub(2.0f); + for (int i = 0; i < 128; i++) { + expectEquals(i, a[i], "sub"); + } + mul(2.0f); + for (int i = 0; i < 128; i++) { + expectEquals(i + i, a[i], "mul"); + } + div(2.0f); + for (int i = 0; i < 128; i++) { + expectEquals(i, a[i], "div"); + } + neg(); + for (int i = 0; i < 128; i++) { + expectEquals(-i, a[i], "neg"); + } + // Loop bounds. + bounds(); + expectEquals(0, a[0], "bounds0"); + for (int i = 1; i < 127; i++) { + expectEquals(11 - i, a[i], "bounds"); + } + expectEquals(-127, a[127], "bounds127"); + // Abs. + abs(); + expectEquals(0, a[0], "abs0"); + for (int i = 1; i <= 11; i++) { + expectEquals(11 - i, a[i], "abs_lo"); + } + for (int i = 12; i < 127; i++) { + expectEquals(i - 11, a[i], "abs_hi"); + } + expectEquals(127, a[127], "abs127"); + // Conversion. + int[] b = new int[128]; + for (int i = 0; i < 128; i++) { + b[i] = 1000 * i; + } + conv(b); + for (int i = 1; i < 127; i++) { + expectEquals(1000.0f * i, a[i], "conv"); + } + // Done. + System.out.println("passed"); + } + + private static void expectEquals(float expected, float result, String action) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result + " for " + action); + } + } +} diff --git a/test/640-checker-int-simd/expected.txt b/test/640-checker-int-simd/expected.txt new file mode 100644 index 0000000000..b0aad4deb5 --- /dev/null +++ b/test/640-checker-int-simd/expected.txt @@ -0,0 +1 @@ +passed diff --git a/test/640-checker-int-simd/info.txt b/test/640-checker-int-simd/info.txt new file mode 100644 index 0000000000..c9c6d5ed9f --- /dev/null +++ b/test/640-checker-int-simd/info.txt @@ -0,0 +1 @@ +Functional tests on SIMD vectorization. diff --git a/test/640-checker-int-simd/src/Main.java b/test/640-checker-int-simd/src/Main.java new file mode 100644 index 0000000000..ba1e142668 --- /dev/null +++ b/test/640-checker-int-simd/src/Main.java @@ -0,0 +1,256 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Functional tests for SIMD vectorization. + */ +public class Main { + + static int[] a; + + // + // Arithmetic operations. + // + + /// CHECK-START: void Main.add(int) loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.add(int) loop_optimization (after) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecAdd loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore loop:<<Loop>> outer_loop:none + static void add(int x) { + for (int i = 0; i < 128; i++) + a[i] += x; + } + + /// CHECK-START: void Main.sub(int) loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.sub(int) loop_optimization (after) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecSub loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore loop:<<Loop>> outer_loop:none + static void sub(int x) { + for (int i = 0; i < 128; i++) + a[i] -= x; + } + + /// CHECK-START: void Main.mul(int) loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.mul(int) loop_optimization (after) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecMul loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore loop:<<Loop>> outer_loop:none + static void mul(int x) { + for (int i = 0; i < 128; i++) + a[i] *= x; + } + + /// CHECK-START: void Main.div(int) loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START: void Main.div(int) loop_optimization (after) + // + // Not supported on any architecture. + // + static void div(int x) { + for (int i = 0; i < 128; i++) + a[i] /= x; + } + + /// CHECK-START: void Main.neg() loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.neg() loop_optimization (after) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecNeg loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore loop:<<Loop>> outer_loop:none + static void neg() { + for (int i = 0; i < 128; i++) + a[i] = -a[i]; + } + + /// CHECK-START: void Main.not() loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.not() loop_optimization (after) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecNot loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore loop:<<Loop>> outer_loop:none + static void not() { + for (int i = 0; i < 128; i++) + a[i] = ~a[i]; + } + + /// CHECK-START: void Main.shl4() loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.shl4() loop_optimization (after) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecShl loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore loop:<<Loop>> outer_loop:none + static void shl4() { + for (int i = 0; i < 128; i++) + a[i] <<= 4; + } + + /// CHECK-START: void Main.sar2() loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.sar2() loop_optimization (after) + // + // TODO: fill in when supported + static void sar2() { + for (int i = 0; i < 128; i++) + a[i] >>= 2; + } + + /// CHECK-START: void Main.shr2() loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.shr2() loop_optimization (after) + // + // TODO: fill in when supported + static void shr2() { + for (int i = 0; i < 128; i++) + a[i] >>>= 2; + } + + // + // Shift sanity. + // + + static void shr32() { + for (int i = 0; i < 128; i++) + a[i] >>>= 32; // 0, since & 31 + } + + static void shr33() { + for (int i = 0; i < 128; i++) + a[i] >>>= 33; // 1, since & 31 + } + + // + // Loop bounds. + // + + static void bounds() { + for (int i = 1; i < 127; i++) + a[i] += 11; + } + + // + // Test Driver. + // + + public static void main(String[] args) { + // Set up. + a = new int[128]; + for (int i = 0; i < 128; i++) { + a[i] = i; + } + // Arithmetic operations. + add(2); + for (int i = 0; i < 128; i++) { + expectEquals(i + 2, a[i], "add"); + } + sub(2); + for (int i = 0; i < 128; i++) { + expectEquals(i, a[i], "sub"); + } + mul(2); + for (int i = 0; i < 128; i++) { + expectEquals(i + i, a[i], "mul"); + } + div(2); + for (int i = 0; i < 128; i++) { + expectEquals(i, a[i], "div"); + } + neg(); + for (int i = 0; i < 128; i++) { + expectEquals(-i, a[i], "neg"); + } + // Loop bounds. + bounds(); + expectEquals(0, a[0], "bounds0"); + for (int i = 1; i < 127; i++) { + expectEquals(11 - i, a[i], "bounds"); + } + expectEquals(-127, a[127], "bounds127"); + // Shifts. + for (int i = 0; i < 128; i++) { + a[i] = 0xffffffff; + } + shl4(); + for (int i = 0; i < 128; i++) { + expectEquals(0xfffffff0, a[i], "shl4"); + } + sar2(); + for (int i = 0; i < 128; i++) { + expectEquals(0xfffffffc, a[i], "sar2"); + } + shr2(); + for (int i = 0; i < 128; i++) { + expectEquals(0x3fffffff, a[i], "shr2"); + } + shr32(); + for (int i = 0; i < 128; i++) { + expectEquals(0x3fffffff, a[i], "shr32"); + } + shr33(); + for (int i = 0; i < 128; i++) { + expectEquals(0x1fffffff, a[i], "shr33"); + } + not(); + for (int i = 0; i < 128; i++) { + expectEquals(0xe0000000, a[i], "not"); + } + // Done. + System.out.println("passed"); + } + + private static void expectEquals(int expected, int result, String action) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result + " for " + action); + } + } +} diff --git a/test/640-checker-long-simd/expected.txt b/test/640-checker-long-simd/expected.txt new file mode 100644 index 0000000000..b0aad4deb5 --- /dev/null +++ b/test/640-checker-long-simd/expected.txt @@ -0,0 +1 @@ +passed diff --git a/test/640-checker-long-simd/info.txt b/test/640-checker-long-simd/info.txt new file mode 100644 index 0000000000..c9c6d5ed9f --- /dev/null +++ b/test/640-checker-long-simd/info.txt @@ -0,0 +1 @@ +Functional tests on SIMD vectorization. diff --git a/test/640-checker-long-simd/src/Main.java b/test/640-checker-long-simd/src/Main.java new file mode 100644 index 0000000000..90a2e76538 --- /dev/null +++ b/test/640-checker-long-simd/src/Main.java @@ -0,0 +1,244 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Functional tests for SIMD vectorization. + */ +public class Main { + + static long[] a; + + // + // Arithmetic operations. + // + + /// CHECK-START: void Main.add(long) loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.add(long) loop_optimization (after) + // + // TODO: fill in when supported + static void add(long x) { + for (int i = 0; i < 128; i++) + a[i] += x; + } + + /// CHECK-START: void Main.sub(long) loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.sub(long) loop_optimization (after) + // + // TODO: fill in when supported + static void sub(long x) { + for (int i = 0; i < 128; i++) + a[i] -= x; + } + + /// CHECK-START: void Main.mul(long) loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.mul(long) loop_optimization (after) + // + // TODO: fill in when supported + static void mul(long x) { + for (int i = 0; i < 128; i++) + a[i] *= x; + } + + /// CHECK-START: void Main.div(long) loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START: void Main.div(long) loop_optimization (after) + // + // Not supported on any architecture. + // + static void div(long x) { + for (int i = 0; i < 128; i++) + a[i] /= x; + } + + /// CHECK-START: void Main.neg() loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.neg() loop_optimization (after) + // + // TODO: fill in when supported + static void neg() { + for (int i = 0; i < 128; i++) + a[i] = -a[i]; + } + + /// CHECK-START: void Main.not() loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.not() loop_optimization (after) + // + // TODO: fill in when supported + static void not() { + for (int i = 0; i < 128; i++) + a[i] = ~a[i]; + } + + /// CHECK-START: void Main.shl4() loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.shl4() loop_optimization (after) + // + // TODO: fill in when supported + static void shl4() { + for (int i = 0; i < 128; i++) + a[i] <<= 4; + } + + /// CHECK-START: void Main.sar2() loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.sar2() loop_optimization (after) + // + // TODO: fill in when supported + static void sar2() { + for (int i = 0; i < 128; i++) + a[i] >>= 2; + } + + /// CHECK-START: void Main.shr2() loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.shr2() loop_optimization (after) + // + // TODO: fill in when supported + static void shr2() { + for (int i = 0; i < 128; i++) + a[i] >>>= 2; + } + + // + // Shift sanity. + // + + static void shr64() { + for (int i = 0; i < 128; i++) + a[i] >>>= 64; // 0, since & 63 + } + + static void shr65() { + for (int i = 0; i < 128; i++) + a[i] >>>= 65; // 1, since & 63 + } + + // + // Loop bounds. + // + + static void bounds() { + for (int i = 1; i < 127; i++) + a[i] += 11; + } + + // + // Test Driver. + // + + public static void main(String[] args) { + // Set up. + a = new long[128]; + for (int i = 0; i < 128; i++) { + a[i] = i; + } + // Arithmetic operations. + add(2L); + for (int i = 0; i < 128; i++) { + expectEquals(i + 2, a[i], "add"); + } + sub(2L); + for (int i = 0; i < 128; i++) { + expectEquals(i, a[i], "sub"); + } + mul(2L); + for (int i = 0; i < 128; i++) { + expectEquals(i + i, a[i], "mul"); + } + div(2L); + for (int i = 0; i < 128; i++) { + expectEquals(i, a[i], "div"); + } + neg(); + for (int i = 0; i < 128; i++) { + expectEquals(-i, a[i], "neg"); + } + // Loop bounds. + bounds(); + expectEquals(0, a[0], "bounds0"); + for (int i = 1; i < 127; i++) { + expectEquals(11 - i, a[i], "bounds"); + } + expectEquals(-127, a[127], "bounds127"); + // Shifts. + for (int i = 0; i < 128; i++) { + a[i] = 0xffffffffffffffffL; + } + shl4(); + for (int i = 0; i < 128; i++) { + expectEquals(0xfffffffffffffff0L, a[i], "shl4"); + } + sar2(); + for (int i = 0; i < 128; i++) { + expectEquals(0xfffffffffffffffcL, a[i], "sar2"); + } + shr2(); + for (int i = 0; i < 128; i++) { + expectEquals(0x3fffffffffffffffL, a[i], "shr2"); + } + shr64(); + for (int i = 0; i < 128; i++) { + expectEquals(0x3fffffffffffffffL, a[i], "shr64"); + } + shr65(); + for (int i = 0; i < 128; i++) { + expectEquals(0x1fffffffffffffffL, a[i], "shr65"); + } + not(); + for (int i = 0; i < 128; i++) { + expectEquals(0xe000000000000000L, a[i], "not"); + } + // Done. + System.out.println("passed"); + } + + private static void expectEquals(long expected, long result, String action) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result + " for " + action); + } + } +} diff --git a/test/640-checker-short-simd/expected.txt b/test/640-checker-short-simd/expected.txt new file mode 100644 index 0000000000..b0aad4deb5 --- /dev/null +++ b/test/640-checker-short-simd/expected.txt @@ -0,0 +1 @@ +passed diff --git a/test/640-checker-short-simd/info.txt b/test/640-checker-short-simd/info.txt new file mode 100644 index 0000000000..c9c6d5ed9f --- /dev/null +++ b/test/640-checker-short-simd/info.txt @@ -0,0 +1 @@ +Functional tests on SIMD vectorization. diff --git a/test/640-checker-short-simd/src/Main.java b/test/640-checker-short-simd/src/Main.java new file mode 100644 index 0000000000..241f8e6eea --- /dev/null +++ b/test/640-checker-short-simd/src/Main.java @@ -0,0 +1,277 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Functional tests for SIMD vectorization. + */ +public class Main { + + static short[] a; + + // + // Arithmetic operations. + // + + /// CHECK-START: void Main.add(int) loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.add(int) loop_optimization (after) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecAdd loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore loop:<<Loop>> outer_loop:none + static void add(int x) { + for (int i = 0; i < 128; i++) + a[i] += x; + } + + /// CHECK-START: void Main.sub(int) loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.sub(int) loop_optimization (after) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecSub loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore loop:<<Loop>> outer_loop:none + static void sub(int x) { + for (int i = 0; i < 128; i++) + a[i] -= x; + } + + /// CHECK-START: void Main.mul(int) loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.mul(int) loop_optimization (after) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecMul loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore loop:<<Loop>> outer_loop:none + static void mul(int x) { + for (int i = 0; i < 128; i++) + a[i] *= x; + } + + /// CHECK-START: void Main.div(int) loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START: void Main.div(int) loop_optimization (after) + // + // Not supported on any architecture. + // + static void div(int x) { + for (int i = 0; i < 128; i++) + a[i] /= x; + } + + /// CHECK-START: void Main.neg() loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.neg() loop_optimization (after) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecNeg loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore loop:<<Loop>> outer_loop:none + static void neg() { + for (int i = 0; i < 128; i++) + a[i] = (short) -a[i]; + } + + /// CHECK-START: void Main.not() loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.not() loop_optimization (after) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecNot loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore loop:<<Loop>> outer_loop:none + static void not() { + for (int i = 0; i < 128; i++) + a[i] = (short) ~a[i]; + } + + /// CHECK-START: void Main.shl4() loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.shl4() loop_optimization (after) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecShl loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore loop:<<Loop>> outer_loop:none + static void shl4() { + for (int i = 0; i < 128; i++) + a[i] <<= 4; + } + + /// CHECK-START: void Main.sar2() loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.sar2() loop_optimization (after) + // + // TODO: fill in when supported + static void sar2() { + for (int i = 0; i < 128; i++) + a[i] >>= 2; + } + + /// CHECK-START: void Main.shr2() loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.shr2() loop_optimization (after) + // + // TODO: fill in when supported + static void shr2() { + for (int i = 0; i < 128; i++) + a[i] >>>= 2; + } + + // + // Shift sanity. + // + + static void sar31() { + for (int i = 0; i < 128; i++) + a[i] >>= 31; + } + + static void shr31() { + for (int i = 0; i < 128; i++) + a[i] >>>= 31; + } + + static void shr32() { + for (int i = 0; i < 128; i++) + a[i] >>>= 32; // 0, since & 31 + } + + + static void shr33() { + for (int i = 0; i < 128; i++) + a[i] >>>= 33; // 1, since & 31 + } + + // + // Loop bounds. + // + + static void add() { + for (int i = 1; i < 127; i++) + a[i] += 11; + } + + // + // Test Driver. + // + + public static void main(String[] args) { + // Set up. + a = new short[128]; + for (int i = 0; i < 128; i++) { + a[i] = (short) i; + } + // Arithmetic operations. + add(2); + for (int i = 0; i < 128; i++) { + expectEquals(i + 2, a[i], "add"); + } + sub(2); + for (int i = 0; i < 128; i++) { + expectEquals(i, a[i], "sub"); + } + mul(2); + for (int i = 0; i < 128; i++) { + expectEquals(i + i, a[i], "mul"); + } + div(2); + for (int i = 0; i < 128; i++) { + expectEquals(i, a[i], "div"); + } + neg(); + for (int i = 0; i < 128; i++) { + expectEquals(-i, a[i], "neg"); + } + // Loop bounds. + add(); + expectEquals(0, a[0], "bounds0"); + for (int i = 1; i < 127; i++) { + expectEquals(11 - i, a[i], "bounds"); + } + expectEquals(-127, a[127], "bounds127"); + // Shifts. + for (int i = 0; i < 128; i++) { + a[i] = (short) 0xffff; + } + shl4(); + for (int i = 0; i < 128; i++) { + expectEquals((short) 0xfff0, a[i], "shl4"); + } + sar2(); + for (int i = 0; i < 128; i++) { + expectEquals((short) 0xfffc, a[i], "sar2"); + } + shr2(); + for (int i = 0; i < 128; i++) { + expectEquals((short) 0xffff, a[i], "shr2"); // sic! + } + sar31(); + for (int i = 0; i < 128; i++) { + expectEquals((short) 0xffff, a[i], "sar31"); + } + shr31(); + for (int i = 0; i < 128; i++) { + expectEquals(0x0001, a[i], "shr31"); + a[i] = (short) 0x1200; // reset + } + shr32(); + for (int i = 0; i < 128; i++) { + expectEquals((short) 0x1200, a[i], "shr32"); + } + shr33(); + for (int i = 0; i < 128; i++) { + expectEquals((short) 0x0900, a[i], "shr33"); + a[i] = (short) 0xf0f1; // reset + } + not(); + for (int i = 0; i < 128; i++) { + expectEquals((short) 0x0f0e, a[i], "not"); + } + // Done. + System.out.println("passed"); + } + + private static void expectEquals(int expected, int result, String action) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result + " for " + action); + } + } +} |