diff options
author | 2018-07-13 00:05:27 +0000 | |
---|---|---|
committer | 2018-07-13 00:05:27 +0000 | |
commit | f5f56c791c5853f43a2a9781c98d5776c7dd5a59 (patch) | |
tree | ed8270e3a5d0161ebe5bec0606a24cd5e3123e59 /compiler/optimizing | |
parent | 61908880e6565acfadbafe93fa64de000014f1a6 (diff) |
Revert "Emit vector mulitply and accumulate instructions for x86."
This reverts commit 61908880e6565acfadbafe93fa64de000014f1a6.
Reason for revert: By failing to round multiply results, it does not follow Java rounding rules.
Change-Id: Ic0ef08691bef266c9f8d91973e596e09ff3307c6
Diffstat (limited to 'compiler/optimizing')
-rw-r--r-- | compiler/optimizing/code_generator_vector_x86.cc | 56 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_vector_x86_64.cc | 58 | ||||
-rw-r--r-- | compiler/optimizing/instruction_simplifier_x86.cc | 149 | ||||
-rw-r--r-- | compiler/optimizing/instruction_simplifier_x86.h | 44 | ||||
-rw-r--r-- | compiler/optimizing/nodes_vector.h | 4 | ||||
-rw-r--r-- | compiler/optimizing/optimization.cc | 8 | ||||
-rw-r--r-- | compiler/optimizing/optimization.h | 1 | ||||
-rw-r--r-- | compiler/optimizing/optimizing_compiler.cc | 6 |
8 files changed, 12 insertions, 314 deletions
diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc index 58808769e2..086ae07a06 100644 --- a/compiler/optimizing/code_generator_vector_x86.cc +++ b/compiler/optimizing/code_generator_vector_x86.cc @@ -1125,59 +1125,13 @@ static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* in } } -void LocationsBuilderX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr); - switch (instr->GetPackedType()) { - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - locations->SetInAt( - HVecMultiplyAccumulate::kInputAccumulatorIndex, Location::RequiresFpuRegister()); - locations->SetInAt( - HVecMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresFpuRegister()); - locations->SetInAt( - HVecMultiplyAccumulate::kInputMulRightIndex, Location::RequiresFpuRegister()); - DCHECK_EQ(HVecMultiplyAccumulate::kInputAccumulatorIndex, 0); - locations->SetOut(Location::SameAsFirstInput()); - break; - default: - // VecMultiplyAccumulate is supported only for single and - // double precision floating points. Hence integral types - // are still not converted. - LOG(FATAL) << "Unsupported SIMD Type"; - } +void LocationsBuilderX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { + CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction); } -void InstructionCodeGeneratorX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { - LocationSummary* locations = instr->GetLocations(); - DCHECK(locations->InAt(0).Equals(locations->Out())); - XmmRegister accumulator = locations->InAt( - HVecMultiplyAccumulate::kInputAccumulatorIndex).AsFpuRegister<XmmRegister>(); - XmmRegister mul_left = locations->InAt( - HVecMultiplyAccumulate::kInputMulLeftIndex).AsFpuRegister<XmmRegister>(); - XmmRegister mul_right = locations->InAt( - HVecMultiplyAccumulate::kInputMulRightIndex).AsFpuRegister<XmmRegister>(); - switch (instr->GetPackedType()) { - case DataType::Type::kFloat32: - DCHECK_EQ(4u, instr->GetVectorLength()); - if (instr->GetOpKind() == HInstruction::InstructionKind::kAdd) - __ vfmadd231ps(accumulator, mul_left, mul_right); - else - __ vfmsub231ps(accumulator, mul_left, mul_right); - break; - case DataType::Type::kFloat64: - DCHECK_EQ(2u, instr->GetVectorLength()); - if (instr->GetOpKind() == HInstruction::InstructionKind::kAdd) - __ vfmadd231pd(accumulator, mul_left, mul_right); - else - __ vfmsub231pd(accumulator, mul_left, mul_right); - break; - default: - - // VecMultiplyAccumulate is supported only for single and - // double precision floating points. Hence integral types - // are still not converted. - LOG(FATAL) << "Unsupported SIMD Type"; - } +void InstructionCodeGeneratorX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { + // TODO: pmaddwd? + LOG(FATAL) << "No SIMD for " << instruction->GetId(); } void LocationsBuilderX86::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc index 4795e86933..4d31ab68d1 100644 --- a/compiler/optimizing/code_generator_vector_x86_64.cc +++ b/compiler/optimizing/code_generator_vector_x86_64.cc @@ -1098,61 +1098,13 @@ static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* in } } -void LocationsBuilderX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr); - switch (instr->GetPackedType()) { - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - locations->SetInAt( - HVecMultiplyAccumulate::kInputAccumulatorIndex, Location::RequiresFpuRegister()); - locations->SetInAt( - HVecMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresFpuRegister()); - locations->SetInAt( - HVecMultiplyAccumulate::kInputMulRightIndex, Location::RequiresFpuRegister()); - DCHECK_EQ(HVecMultiplyAccumulate::kInputAccumulatorIndex, 0); - locations->SetOut(Location::SameAsFirstInput()); - break; - default: - // VecMultiplyAccumulate is supported only for single and - // double precision floating points. Hence integral types - // are still not converted. - LOG(FATAL) << "Unsupported SIMD type"; - } +void LocationsBuilderX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { + CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction); } - -void InstructionCodeGeneratorX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { - LocationSummary* locations = instr->GetLocations(); - DCHECK(locations->InAt(0).Equals(locations->Out())); - XmmRegister accumulator = locations->InAt( - HVecMultiplyAccumulate::kInputAccumulatorIndex).AsFpuRegister<XmmRegister>(); - XmmRegister mul_left = locations->InAt( - HVecMultiplyAccumulate::kInputMulLeftIndex).AsFpuRegister<XmmRegister>(); - XmmRegister mul_right = locations->InAt( - HVecMultiplyAccumulate::kInputMulRightIndex).AsFpuRegister<XmmRegister>(); - - switch (instr->GetPackedType()) { - case DataType::Type::kFloat32: - DCHECK_EQ(4u, instr->GetVectorLength()); - if (instr->GetOpKind() == HInstruction::InstructionKind::kAdd) - __ vfmadd231ps(accumulator, mul_left, mul_right); - else - __ vfmsub231ps(accumulator, mul_left, mul_right); - break; - case DataType::Type::kFloat64: - DCHECK_EQ(2u, instr->GetVectorLength()); - if (instr->GetOpKind() == HInstruction::InstructionKind::kAdd) - __ vfmadd231pd(accumulator, mul_left, mul_right); - else - __ vfmsub231pd(accumulator, mul_left, mul_right); - break; - default: - - // VecMultiplyAccumulate is supported only for single and - // double precision floating points. Hence integral types - // are still not converted. - LOG(FATAL) << "Unsupported SIMD Type"; - } +void InstructionCodeGeneratorX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { + // TODO: pmaddwd? + LOG(FATAL) << "No SIMD for " << instruction->GetId(); } void LocationsBuilderX86_64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { diff --git a/compiler/optimizing/instruction_simplifier_x86.cc b/compiler/optimizing/instruction_simplifier_x86.cc deleted file mode 100644 index b3f67d6e84..0000000000 --- a/compiler/optimizing/instruction_simplifier_x86.cc +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Copyright (C) 2018 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "instruction_simplifier_x86.h" -#include "arch/x86/instruction_set_features_x86.h" -#include "mirror/array-inl.h" -#include "code_generator.h" - - -namespace art { - -namespace x86 { - -class InstructionSimplifierX86Visitor : public HGraphVisitor { - public: - InstructionSimplifierX86Visitor(HGraph* graph, - CodeGeneratorX86 *codegen, - OptimizingCompilerStats* stats) - : HGraphVisitor(graph), codegen_(codegen), stats_(stats) {} - - private: - void RecordSimplification() { - MaybeRecordStat(stats_, MethodCompilationStat::kInstructionSimplificationsArch); - } - - bool HasCpuFeatureFlag() { - return (codegen_->GetInstructionSetFeatures().HasAVX2()); - } - - /** - * This simplifier uses a special-purpose BB visitor. - * (1) No need to visit Phi nodes. - * (2) Since statements can be removed in a "forward" fashion, - * the visitor should test if each statement is still there. - */ - void VisitBasicBlock(HBasicBlock* block) OVERRIDE { - // TODO: fragile iteration, provide more robust iterators? - for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { - HInstruction* instruction = it.Current(); - if (instruction->IsInBlock()) { - instruction->Accept(this); - } - } - } - - bool TryGenerateVecMultiplyAccumulate(HVecMul* mul); - void VisitVecMul(HVecMul* instruction) OVERRIDE; - - CodeGeneratorX86* codegen_; - OptimizingCompilerStats* stats_; -}; - -/* generic expressions for FMA -a = (b * c) + a -a = (b * c) – a -*/ -bool InstructionSimplifierX86Visitor::TryGenerateVecMultiplyAccumulate(HVecMul* mul) { - if (!(mul->GetPackedType() == DataType::Type::kFloat32 || - mul->GetPackedType() == DataType::Type::kFloat64)) { - return false; - } - ArenaAllocator* allocator = mul->GetBlock()->GetGraph()->GetAllocator(); - if (mul->HasOnlyOneNonEnvironmentUse()) { - HInstruction* use = mul->GetUses().front().GetUser(); - if (use->IsVecAdd() || use->IsVecSub()) { - // Replace code looking like - // VECMUL tmp, x, y - // VECADD dst, acc, tmp or VECADD dst, tmp, acc - // or - // VECSUB dst, tmp, acc - // with - // VECMULACC dst, acc, x, y - - // Note that we do not want to (unconditionally) perform the merge when the - // multiplication has multiple uses and it can be merged in all of them. - // Multiple uses could happen on the same control-flow path, and we would - // then increase the amount of work. In the future we could try to evaluate - // whether all uses are on different control-flow paths (using dominance and - // reverse-dominance information) and only perform the merge when they are. - HInstruction* accumulator = nullptr; - HVecBinaryOperation* binop = use->AsVecBinaryOperation(); - HInstruction* binop_left = binop->GetLeft(); - HInstruction* binop_right = binop->GetRight(); - DCHECK_NE(binop_left, binop_right); - if (use->IsVecSub()) { - if (binop_left == mul) { - accumulator = binop_right; - } - } else { - // VecAdd - if (binop_right == mul) { - accumulator = binop_left; - } else { - DCHECK_EQ(binop_left, mul); - accumulator = binop_right; - } - } - HInstruction::InstructionKind kind = - use->IsVecAdd() ? HInstruction::kAdd : HInstruction::kSub; - - if (accumulator != nullptr) { - HVecMultiplyAccumulate* mulacc = - new (allocator) HVecMultiplyAccumulate(allocator, - kind, - accumulator, - mul->GetLeft(), - mul->GetRight(), - binop->GetPackedType(), - binop->GetVectorLength(), - binop->GetDexPc()); - binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc); - DCHECK(!mul->HasUses()); - mul->GetBlock()->RemoveInstruction(mul); - return true; - } - } - } - return false; -} - -void InstructionSimplifierX86Visitor::VisitVecMul(HVecMul* instruction) { - if (HasCpuFeatureFlag()) { - if (TryGenerateVecMultiplyAccumulate(instruction)) { - RecordSimplification(); - } - } -} - -bool InstructionSimplifierX86::Run() { - InstructionSimplifierX86Visitor visitor(graph_, codegen_, stats_); - visitor.VisitReversePostOrder(); - return true; -} - -} // namespace x86 -} // namespace art diff --git a/compiler/optimizing/instruction_simplifier_x86.h b/compiler/optimizing/instruction_simplifier_x86.h deleted file mode 100644 index 1fb199f728..0000000000 --- a/compiler/optimizing/instruction_simplifier_x86.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (C) 2018 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_H_ -#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_H_ - -#include "nodes.h" -#include "optimization.h" -#include "code_generator_x86.h" - -namespace art { -namespace x86 { - -class InstructionSimplifierX86 : public HOptimization { - public: - InstructionSimplifierX86(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats) - : HOptimization(graph, kInstructionSimplifierX86PassName, stats), - codegen_(down_cast<CodeGeneratorX86*>(codegen)) {} - - static constexpr const char* kInstructionSimplifierX86PassName = "instruction_simplifier_x86"; - - bool Run() OVERRIDE; - - private: - CodeGeneratorX86* codegen_; -}; - -} // namespace x86 -} // namespace art - -#endif // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_H_ diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h index b4f9993ad6..c5e9a8d036 100644 --- a/compiler/optimizing/nodes_vector.h +++ b/compiler/optimizing/nodes_vector.h @@ -958,10 +958,6 @@ class HVecMultiplyAccumulate FINAL : public HVecOperation { SetRawInputAt(2, mul_right); } - static constexpr int kInputAccumulatorIndex = 0; - static constexpr int kInputMulLeftIndex = 1; - static constexpr int kInputMulRightIndex = 2; - bool CanBeMoved() const OVERRIDE { return true; } bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { diff --git a/compiler/optimizing/optimization.cc b/compiler/optimizing/optimization.cc index 3ad2c6b3f6..a38bd2464d 100644 --- a/compiler/optimizing/optimization.cc +++ b/compiler/optimizing/optimization.cc @@ -28,7 +28,6 @@ #endif #ifdef ART_ENABLE_CODEGEN_x86 #include "pc_relative_fixups_x86.h" -#include "instruction_simplifier_x86.h" #endif #if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64) #include "x86_memory_gen.h" @@ -122,8 +121,6 @@ const char* OptimizationPassName(OptimizationPass pass) { #if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64) case OptimizationPass::kX86MemoryOperandGeneration: return x86::X86MemoryOperandGeneration::kX86MemoryOperandGenerationPassName; - case OptimizationPass::kInstructionSimplifierX86: - return x86::InstructionSimplifierX86::kInstructionSimplifierX86PassName; #endif case OptimizationPass::kNone: LOG(FATAL) << "kNone does not represent an actual pass"; @@ -166,7 +163,6 @@ OptimizationPass OptimizationPassByName(const std::string& pass_name) { #ifdef ART_ENABLE_CODEGEN_x86 X(OptimizationPass::kPcRelativeFixupsX86); X(OptimizationPass::kX86MemoryOperandGeneration); - X(OptimizationPass::kInstructionSimplifierX86); #endif LOG(FATAL) << "Cannot find optimization " << pass_name; UNREACHABLE(); @@ -327,10 +323,6 @@ ArenaVector<HOptimization*> ConstructOptimizations( DCHECK(alt_name == nullptr) << "arch-specific pass does not support alternative name"; opt = new (allocator) x86::X86MemoryOperandGeneration(graph, codegen, stats); break; - case OptimizationPass::kInstructionSimplifierX86: - DCHECK(alt_name == nullptr) << "arch-specific pass does not support alternative name"; - opt = new (allocator) x86::InstructionSimplifierX86(graph, codegen, stats); - break; #endif case OptimizationPass::kNone: LOG(FATAL) << "kNone does not represent an actual pass"; diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h index a9fafa0864..88b283cebf 100644 --- a/compiler/optimizing/optimization.h +++ b/compiler/optimizing/optimization.h @@ -101,7 +101,6 @@ enum class OptimizationPass { #endif #if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64) kX86MemoryOperandGeneration, - kInstructionSimplifierX86, #endif kNone, kLast = kNone diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index bb33ba3564..84863e4357 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -530,8 +530,7 @@ bool OptimizingCompiler::RunArchOptimizations(HGraph* graph, OptDef(OptimizationPass::kSideEffectsAnalysis), OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"), OptDef(OptimizationPass::kPcRelativeFixupsX86), - OptDef(OptimizationPass::kX86MemoryOperandGeneration), - OptDef(OptimizationPass::kInstructionSimplifierX86) + OptDef(OptimizationPass::kX86MemoryOperandGeneration) }; return RunOptimizations(graph, codegen, @@ -546,8 +545,7 @@ bool OptimizingCompiler::RunArchOptimizations(HGraph* graph, OptimizationDef x86_64_optimizations[] = { OptDef(OptimizationPass::kSideEffectsAnalysis), OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"), - OptDef(OptimizationPass::kX86MemoryOperandGeneration), - OptDef(OptimizationPass::kInstructionSimplifierX86) + OptDef(OptimizationPass::kX86MemoryOperandGeneration) }; return RunOptimizations(graph, codegen, |