Revert "Emit vector mulitply and accumulate instructions for x86."
This reverts commit 61908880e6565acfadbafe93fa64de000014f1a6.
Reason for revert: By failing to round multiply results, it does not follow Java rounding rules.
Change-Id: Ic0ef08691bef266c9f8d91973e596e09ff3307c6
diff --git a/compiler/Android.bp b/compiler/Android.bp
index 45dea1c..11521e6 100644
--- a/compiler/Android.bp
+++ b/compiler/Android.bp
@@ -161,7 +161,6 @@
"utils/x86/assembler_x86.cc",
"utils/x86/jni_macro_assembler_x86.cc",
"utils/x86/managed_register_x86.cc",
- "optimizing/instruction_simplifier_x86.cc",
],
},
x86_64: {
diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc
index 5880876..086ae07 100644
--- a/compiler/optimizing/code_generator_vector_x86.cc
+++ b/compiler/optimizing/code_generator_vector_x86.cc
@@ -1125,59 +1125,13 @@
}
}
-void LocationsBuilderX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
- LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr);
- switch (instr->GetPackedType()) {
- case DataType::Type::kFloat32:
- case DataType::Type::kFloat64:
- locations->SetInAt(
- HVecMultiplyAccumulate::kInputAccumulatorIndex, Location::RequiresFpuRegister());
- locations->SetInAt(
- HVecMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresFpuRegister());
- locations->SetInAt(
- HVecMultiplyAccumulate::kInputMulRightIndex, Location::RequiresFpuRegister());
- DCHECK_EQ(HVecMultiplyAccumulate::kInputAccumulatorIndex, 0);
- locations->SetOut(Location::SameAsFirstInput());
- break;
- default:
- // VecMultiplyAccumulate is supported only for single and
- // double precision floating points. Hence integral types
- // are still not converted.
- LOG(FATAL) << "Unsupported SIMD Type";
- }
+void LocationsBuilderX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
+ CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
}
-void InstructionCodeGeneratorX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
- LocationSummary* locations = instr->GetLocations();
- DCHECK(locations->InAt(0).Equals(locations->Out()));
- XmmRegister accumulator = locations->InAt(
- HVecMultiplyAccumulate::kInputAccumulatorIndex).AsFpuRegister<XmmRegister>();
- XmmRegister mul_left = locations->InAt(
- HVecMultiplyAccumulate::kInputMulLeftIndex).AsFpuRegister<XmmRegister>();
- XmmRegister mul_right = locations->InAt(
- HVecMultiplyAccumulate::kInputMulRightIndex).AsFpuRegister<XmmRegister>();
- switch (instr->GetPackedType()) {
- case DataType::Type::kFloat32:
- DCHECK_EQ(4u, instr->GetVectorLength());
- if (instr->GetOpKind() == HInstruction::InstructionKind::kAdd)
- __ vfmadd231ps(accumulator, mul_left, mul_right);
- else
- __ vfmsub231ps(accumulator, mul_left, mul_right);
- break;
- case DataType::Type::kFloat64:
- DCHECK_EQ(2u, instr->GetVectorLength());
- if (instr->GetOpKind() == HInstruction::InstructionKind::kAdd)
- __ vfmadd231pd(accumulator, mul_left, mul_right);
- else
- __ vfmsub231pd(accumulator, mul_left, mul_right);
- break;
- default:
-
- // VecMultiplyAccumulate is supported only for single and
- // double precision floating points. Hence integral types
- // are still not converted.
- LOG(FATAL) << "Unsupported SIMD Type";
- }
+void InstructionCodeGeneratorX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
+ // TODO: pmaddwd?
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
void LocationsBuilderX86::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc
index 4795e86..4d31ab6 100644
--- a/compiler/optimizing/code_generator_vector_x86_64.cc
+++ b/compiler/optimizing/code_generator_vector_x86_64.cc
@@ -1098,61 +1098,13 @@
}
}
-void LocationsBuilderX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
- LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr);
- switch (instr->GetPackedType()) {
- case DataType::Type::kFloat32:
- case DataType::Type::kFloat64:
- locations->SetInAt(
- HVecMultiplyAccumulate::kInputAccumulatorIndex, Location::RequiresFpuRegister());
- locations->SetInAt(
- HVecMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresFpuRegister());
- locations->SetInAt(
- HVecMultiplyAccumulate::kInputMulRightIndex, Location::RequiresFpuRegister());
- DCHECK_EQ(HVecMultiplyAccumulate::kInputAccumulatorIndex, 0);
- locations->SetOut(Location::SameAsFirstInput());
- break;
- default:
- // VecMultiplyAccumulate is supported only for single and
- // double precision floating points. Hence integral types
- // are still not converted.
- LOG(FATAL) << "Unsupported SIMD type";
- }
+void LocationsBuilderX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
+ CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
}
-
-void InstructionCodeGeneratorX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
- LocationSummary* locations = instr->GetLocations();
- DCHECK(locations->InAt(0).Equals(locations->Out()));
- XmmRegister accumulator = locations->InAt(
- HVecMultiplyAccumulate::kInputAccumulatorIndex).AsFpuRegister<XmmRegister>();
- XmmRegister mul_left = locations->InAt(
- HVecMultiplyAccumulate::kInputMulLeftIndex).AsFpuRegister<XmmRegister>();
- XmmRegister mul_right = locations->InAt(
- HVecMultiplyAccumulate::kInputMulRightIndex).AsFpuRegister<XmmRegister>();
-
- switch (instr->GetPackedType()) {
- case DataType::Type::kFloat32:
- DCHECK_EQ(4u, instr->GetVectorLength());
- if (instr->GetOpKind() == HInstruction::InstructionKind::kAdd)
- __ vfmadd231ps(accumulator, mul_left, mul_right);
- else
- __ vfmsub231ps(accumulator, mul_left, mul_right);
- break;
- case DataType::Type::kFloat64:
- DCHECK_EQ(2u, instr->GetVectorLength());
- if (instr->GetOpKind() == HInstruction::InstructionKind::kAdd)
- __ vfmadd231pd(accumulator, mul_left, mul_right);
- else
- __ vfmsub231pd(accumulator, mul_left, mul_right);
- break;
- default:
-
- // VecMultiplyAccumulate is supported only for single and
- // double precision floating points. Hence integral types
- // are still not converted.
- LOG(FATAL) << "Unsupported SIMD Type";
- }
+void InstructionCodeGeneratorX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
+ // TODO: pmaddwd?
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
void LocationsBuilderX86_64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
diff --git a/compiler/optimizing/instruction_simplifier_x86.cc b/compiler/optimizing/instruction_simplifier_x86.cc
deleted file mode 100644
index b3f67d6..0000000
--- a/compiler/optimizing/instruction_simplifier_x86.cc
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- * Copyright (C) 2018 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "instruction_simplifier_x86.h"
-#include "arch/x86/instruction_set_features_x86.h"
-#include "mirror/array-inl.h"
-#include "code_generator.h"
-
-
-namespace art {
-
-namespace x86 {
-
-class InstructionSimplifierX86Visitor : public HGraphVisitor {
- public:
- InstructionSimplifierX86Visitor(HGraph* graph,
- CodeGeneratorX86 *codegen,
- OptimizingCompilerStats* stats)
- : HGraphVisitor(graph), codegen_(codegen), stats_(stats) {}
-
- private:
- void RecordSimplification() {
- MaybeRecordStat(stats_, MethodCompilationStat::kInstructionSimplificationsArch);
- }
-
- bool HasCpuFeatureFlag() {
- return (codegen_->GetInstructionSetFeatures().HasAVX2());
- }
-
- /**
- * This simplifier uses a special-purpose BB visitor.
- * (1) No need to visit Phi nodes.
- * (2) Since statements can be removed in a "forward" fashion,
- * the visitor should test if each statement is still there.
- */
- void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
- // TODO: fragile iteration, provide more robust iterators?
- for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
- HInstruction* instruction = it.Current();
- if (instruction->IsInBlock()) {
- instruction->Accept(this);
- }
- }
- }
-
- bool TryGenerateVecMultiplyAccumulate(HVecMul* mul);
- void VisitVecMul(HVecMul* instruction) OVERRIDE;
-
- CodeGeneratorX86* codegen_;
- OptimizingCompilerStats* stats_;
-};
-
-/* generic expressions for FMA
-a = (b * c) + a
-a = (b * c) – a
-*/
-bool InstructionSimplifierX86Visitor::TryGenerateVecMultiplyAccumulate(HVecMul* mul) {
- if (!(mul->GetPackedType() == DataType::Type::kFloat32 ||
- mul->GetPackedType() == DataType::Type::kFloat64)) {
- return false;
- }
- ArenaAllocator* allocator = mul->GetBlock()->GetGraph()->GetAllocator();
- if (mul->HasOnlyOneNonEnvironmentUse()) {
- HInstruction* use = mul->GetUses().front().GetUser();
- if (use->IsVecAdd() || use->IsVecSub()) {
- // Replace code looking like
- // VECMUL tmp, x, y
- // VECADD dst, acc, tmp or VECADD dst, tmp, acc
- // or
- // VECSUB dst, tmp, acc
- // with
- // VECMULACC dst, acc, x, y
-
- // Note that we do not want to (unconditionally) perform the merge when the
- // multiplication has multiple uses and it can be merged in all of them.
- // Multiple uses could happen on the same control-flow path, and we would
- // then increase the amount of work. In the future we could try to evaluate
- // whether all uses are on different control-flow paths (using dominance and
- // reverse-dominance information) and only perform the merge when they are.
- HInstruction* accumulator = nullptr;
- HVecBinaryOperation* binop = use->AsVecBinaryOperation();
- HInstruction* binop_left = binop->GetLeft();
- HInstruction* binop_right = binop->GetRight();
- DCHECK_NE(binop_left, binop_right);
- if (use->IsVecSub()) {
- if (binop_left == mul) {
- accumulator = binop_right;
- }
- } else {
- // VecAdd
- if (binop_right == mul) {
- accumulator = binop_left;
- } else {
- DCHECK_EQ(binop_left, mul);
- accumulator = binop_right;
- }
- }
- HInstruction::InstructionKind kind =
- use->IsVecAdd() ? HInstruction::kAdd : HInstruction::kSub;
-
- if (accumulator != nullptr) {
- HVecMultiplyAccumulate* mulacc =
- new (allocator) HVecMultiplyAccumulate(allocator,
- kind,
- accumulator,
- mul->GetLeft(),
- mul->GetRight(),
- binop->GetPackedType(),
- binop->GetVectorLength(),
- binop->GetDexPc());
- binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc);
- DCHECK(!mul->HasUses());
- mul->GetBlock()->RemoveInstruction(mul);
- return true;
- }
- }
- }
- return false;
-}
-
-void InstructionSimplifierX86Visitor::VisitVecMul(HVecMul* instruction) {
- if (HasCpuFeatureFlag()) {
- if (TryGenerateVecMultiplyAccumulate(instruction)) {
- RecordSimplification();
- }
- }
-}
-
-bool InstructionSimplifierX86::Run() {
- InstructionSimplifierX86Visitor visitor(graph_, codegen_, stats_);
- visitor.VisitReversePostOrder();
- return true;
-}
-
-} // namespace x86
-} // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_x86.h b/compiler/optimizing/instruction_simplifier_x86.h
deleted file mode 100644
index 1fb199f..0000000
--- a/compiler/optimizing/instruction_simplifier_x86.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (C) 2018 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_H_
-#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_H_
-
-#include "nodes.h"
-#include "optimization.h"
-#include "code_generator_x86.h"
-
-namespace art {
-namespace x86 {
-
-class InstructionSimplifierX86 : public HOptimization {
- public:
- InstructionSimplifierX86(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats)
- : HOptimization(graph, kInstructionSimplifierX86PassName, stats),
- codegen_(down_cast<CodeGeneratorX86*>(codegen)) {}
-
- static constexpr const char* kInstructionSimplifierX86PassName = "instruction_simplifier_x86";
-
- bool Run() OVERRIDE;
-
- private:
- CodeGeneratorX86* codegen_;
-};
-
-} // namespace x86
-} // namespace art
-
-#endif // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_H_
diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h
index b4f9993..c5e9a8d 100644
--- a/compiler/optimizing/nodes_vector.h
+++ b/compiler/optimizing/nodes_vector.h
@@ -958,10 +958,6 @@
SetRawInputAt(2, mul_right);
}
- static constexpr int kInputAccumulatorIndex = 0;
- static constexpr int kInputMulLeftIndex = 1;
- static constexpr int kInputMulRightIndex = 2;
-
bool CanBeMoved() const OVERRIDE { return true; }
bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
diff --git a/compiler/optimizing/optimization.cc b/compiler/optimizing/optimization.cc
index 3ad2c6b..a38bd24 100644
--- a/compiler/optimizing/optimization.cc
+++ b/compiler/optimizing/optimization.cc
@@ -28,7 +28,6 @@
#endif
#ifdef ART_ENABLE_CODEGEN_x86
#include "pc_relative_fixups_x86.h"
-#include "instruction_simplifier_x86.h"
#endif
#if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64)
#include "x86_memory_gen.h"
@@ -122,8 +121,6 @@
#if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64)
case OptimizationPass::kX86MemoryOperandGeneration:
return x86::X86MemoryOperandGeneration::kX86MemoryOperandGenerationPassName;
- case OptimizationPass::kInstructionSimplifierX86:
- return x86::InstructionSimplifierX86::kInstructionSimplifierX86PassName;
#endif
case OptimizationPass::kNone:
LOG(FATAL) << "kNone does not represent an actual pass";
@@ -166,7 +163,6 @@
#ifdef ART_ENABLE_CODEGEN_x86
X(OptimizationPass::kPcRelativeFixupsX86);
X(OptimizationPass::kX86MemoryOperandGeneration);
- X(OptimizationPass::kInstructionSimplifierX86);
#endif
LOG(FATAL) << "Cannot find optimization " << pass_name;
UNREACHABLE();
@@ -327,10 +323,6 @@
DCHECK(alt_name == nullptr) << "arch-specific pass does not support alternative name";
opt = new (allocator) x86::X86MemoryOperandGeneration(graph, codegen, stats);
break;
- case OptimizationPass::kInstructionSimplifierX86:
- DCHECK(alt_name == nullptr) << "arch-specific pass does not support alternative name";
- opt = new (allocator) x86::InstructionSimplifierX86(graph, codegen, stats);
- break;
#endif
case OptimizationPass::kNone:
LOG(FATAL) << "kNone does not represent an actual pass";
diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h
index a9fafa0..88b283c 100644
--- a/compiler/optimizing/optimization.h
+++ b/compiler/optimizing/optimization.h
@@ -101,7 +101,6 @@
#endif
#if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64)
kX86MemoryOperandGeneration,
- kInstructionSimplifierX86,
#endif
kNone,
kLast = kNone
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index bb33ba3..84863e4 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -530,8 +530,7 @@
OptDef(OptimizationPass::kSideEffectsAnalysis),
OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"),
OptDef(OptimizationPass::kPcRelativeFixupsX86),
- OptDef(OptimizationPass::kX86MemoryOperandGeneration),
- OptDef(OptimizationPass::kInstructionSimplifierX86)
+ OptDef(OptimizationPass::kX86MemoryOperandGeneration)
};
return RunOptimizations(graph,
codegen,
@@ -546,8 +545,7 @@
OptimizationDef x86_64_optimizations[] = {
OptDef(OptimizationPass::kSideEffectsAnalysis),
OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"),
- OptDef(OptimizationPass::kX86MemoryOperandGeneration),
- OptDef(OptimizationPass::kInstructionSimplifierX86)
+ OptDef(OptimizationPass::kX86MemoryOperandGeneration)
};
return RunOptimizations(graph,
codegen,
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index c2ce03b..86f9010 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -525,58 +525,6 @@
EmitOperand(dst, src);
}
-void X86Assembler::vfmadd231ps(XmmRegister acc, XmmRegister mul_left, XmmRegister mul_right) {
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- uint8_t byte_zero = EmitVexByteZero(false /*is_two_byte*/);
- uint8_t byte_one = EmitVexByte1(false, false, false, 2);
- uint8_t byte_two = EmitVexByte2(false, 128, X86ManagedRegister::FromXmmRegister(mul_left), 1);
- EmitUint8(byte_zero);
- EmitUint8(byte_one);
- EmitUint8(byte_two);
- // Opcode field.
- EmitUint8(0xB8);
- EmitXmmRegisterOperand(acc, mul_right);
-}
-
-void X86Assembler::vfmsub231ps(XmmRegister acc, XmmRegister mul_left, XmmRegister mul_right) {
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- uint8_t byte_zero = EmitVexByteZero(false /*is_two_byte*/);
- uint8_t byte_one = EmitVexByte1(false, false, false, 2);
- uint8_t byte_two = EmitVexByte2(false, 128, X86ManagedRegister::FromXmmRegister(mul_left), 1);
- EmitUint8(byte_zero);
- EmitUint8(byte_one);
- EmitUint8(byte_two);
- // Opcode field.
- EmitUint8(0xBA);
- EmitXmmRegisterOperand(acc, mul_right);
-}
-
-void X86Assembler::vfmadd231pd(XmmRegister acc, XmmRegister mul_left, XmmRegister mul_right) {
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- uint8_t byte_zero = EmitVexByteZero(false /*is_two_byte*/);
- uint8_t byte_one = EmitVexByte1(false, false, false, 2);
- uint8_t byte_two = EmitVexByte2(true, 128, X86ManagedRegister::FromXmmRegister(mul_left), 1);
- EmitUint8(byte_zero);
- EmitUint8(byte_one);
- EmitUint8(byte_two);
- // Opcode field.
- EmitUint8(0xB8);
- EmitXmmRegisterOperand(acc, mul_right);
-}
-
-void X86Assembler::vfmsub231pd(XmmRegister acc, XmmRegister mul_left, XmmRegister mul_right) {
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- uint8_t byte_zero = EmitVexByteZero(false /*is_two_byte*/);
- uint8_t byte_one = EmitVexByte1(false, false, false, 2);
- uint8_t byte_two = EmitVexByte2(true, 128, X86ManagedRegister::FromXmmRegister(mul_left), 1);
- EmitUint8(byte_zero);
- EmitUint8(byte_one);
- EmitUint8(byte_two);
- // Opcode field.
- EmitUint8(0xBA);
- EmitXmmRegisterOperand(acc, mul_right);
-}
-
void X86Assembler::addps(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
@@ -2950,99 +2898,6 @@
}
-uint8_t X86Assembler::EmitVexByteZero(bool is_two_byte) {
- uint8_t vex_zero = 0xC0;
- if (!is_two_byte) {
- vex_zero |= 0xC4;
- } else {
- vex_zero |= 0xC5;
- }
- return vex_zero;
-}
-
-uint8_t X86Assembler::EmitVexByte1(bool r, bool x, bool b, int mmmmm ) {
- // VEX Byte 1.
- uint8_t vex_prefix = 0;
- if (!r) {
- vex_prefix |= 0x80; // VEX.R .
- }
- if (!x) {
- vex_prefix |= 0x40; // VEX.X .
- }
- if (!b) {
- vex_prefix |= 0x20; // VEX.B .
- }
-
- // VEX.mmmmm.
- switch (mmmmm) {
- case 1:
- // Implied 0F leading opcode byte.
- vex_prefix |= 0x01;
- break;
- case 2:
- // Implied leading 0F 38 opcode byte.
- vex_prefix |= 0x02;
- break;
- case 3:
- // Implied leading OF 3A opcode byte.
- vex_prefix |= 0x03;
- break;
- default:
- LOG(FATAL) << "unknown opcode bytes";
- }
- return vex_prefix;
-}
-
-uint8_t X86Assembler::EmitVexByte2(bool w, int l, X86ManagedRegister operand, int pp) {
- uint8_t vex_prefix = 0;
- // VEX Byte 2.
- if (w) {
- vex_prefix |= 0x80;
- }
-
- // VEX.vvvv.
- if (operand.IsXmmRegister()) {
- XmmRegister vvvv = operand.AsXmmRegister();
- int inverted_reg = 15-static_cast<int>(vvvv);
- uint8_t reg = static_cast<uint8_t>(inverted_reg);
- vex_prefix |= ((reg & 0x0F) << 3);
- } else if (operand.IsCpuRegister()) {
- Register vvvv = operand.AsCpuRegister();
- int inverted_reg = 15 - static_cast<int>(vvvv);
- uint8_t reg = static_cast<uint8_t>(inverted_reg);
- vex_prefix |= ((reg & 0x0F) << 3);
- }
-
- // VEX.L.
- if (l == 256) {
- vex_prefix |= 0x04;
- }
-
- // VEX.pp.
- switch (pp) {
- case 0:
- // SIMD Pefix - None.
- vex_prefix |= 0x00;
- break;
- case 1:
- // SIMD Prefix - 66.
- vex_prefix |= 0x01;
- break;
- case 2:
- // SIMD Prefix - F3.
- vex_prefix |= 0x02;
- break;
- case 3:
- // SIMD Prefix - F2.
- vex_prefix |= 0x03;
- break;
- default:
- LOG(FATAL) << "unknown SIMD Prefix";
- }
-
- return vex_prefix;
-}
-
void X86Assembler::EmitGenericShift(int reg_or_opcode,
const Operand& operand,
const Immediate& imm) {
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 8c9ce82..e42c4c9 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -397,12 +397,6 @@
void divss(XmmRegister dst, XmmRegister src);
void divss(XmmRegister dst, const Address& src);
- // FMA Mac Instructions
- void vfmadd231ps(XmmRegister dst, XmmRegister src1, XmmRegister src2);
- void vfmadd231pd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
- void vfmsub231ps(XmmRegister dst, XmmRegister src1, XmmRegister src2);
- void vfmsub231pd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
-
void addps(XmmRegister dst, XmmRegister src); // no addr variant (for now)
void subps(XmmRegister dst, XmmRegister src);
void mulps(XmmRegister dst, XmmRegister src);
@@ -840,11 +834,6 @@
void EmitLabelLink(Label* label);
void EmitLabelLink(NearLabel* label);
- // Emit a 3 byte VEX Prefix
- uint8_t EmitVexByteZero(bool is_two_byte);
- uint8_t EmitVexByte1(bool r, bool x, bool b, int mmmmm);
- uint8_t EmitVexByte2(bool w , int l , X86ManagedRegister vvv, int pp);
-
void EmitGenericShift(int rm, const Operand& operand, const Immediate& imm);
void EmitGenericShift(int rm, const Operand& operand, Register shifter);
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 9983eae..bd31561 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -603,56 +603,6 @@
}
-void X86_64Assembler::vfmadd231ps(XmmRegister acc, XmmRegister mul_left, XmmRegister mul_right) {
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- uint8_t byte_zero = EmitVexByteZero(false /*is_two_byte*/);
- uint8_t byte_one = EmitVexByte1(acc.NeedsRex(), false, mul_right.NeedsRex(), 2);
- uint8_t byte_two = EmitVexByte2(false, 128, X86_64ManagedRegister::FromXmmRegister(mul_left.AsFloatRegister()), 1);
- EmitUint8(byte_zero);
- EmitUint8(byte_one);
- EmitUint8(byte_two);
- // Opcode field.
- EmitUint8(0xB8);
- EmitXmmRegisterOperand(acc.LowBits(), mul_right);
-}
-
-
-void X86_64Assembler::vfmsub231ps(XmmRegister acc, XmmRegister mul_left, XmmRegister mul_right) {
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- uint8_t byte_zero = EmitVexByteZero(false /*is_two_byte*/);
- uint8_t byte_one = EmitVexByte1(acc.NeedsRex(), false, mul_right.NeedsRex(), 2);
- uint8_t byte_two = EmitVexByte2(false, 128, X86_64ManagedRegister::FromXmmRegister(mul_left.AsFloatRegister()), 1);
- EmitUint8(byte_zero);
- EmitUint8(byte_one);
- EmitUint8(byte_two);
- // Opcode field
- EmitUint8(0xBA);
- EmitXmmRegisterOperand(acc.LowBits(), mul_right);
-}
-
-void X86_64Assembler::vfmadd231pd(XmmRegister acc, XmmRegister mul_left, XmmRegister mul_right) {
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- uint8_t byte_zero = EmitVexByteZero(false /*is_two_byte*/);
- uint8_t byte_one = EmitVexByte1(acc.NeedsRex(), false, mul_right.NeedsRex(), 2);
- uint8_t byte_two = EmitVexByte2(true, 128, X86_64ManagedRegister::FromXmmRegister(mul_left.AsFloatRegister()), 1);
- EmitUint8(byte_zero);
- EmitUint8(byte_one);
- EmitUint8(byte_two);
- EmitUint8(0xB8);
- EmitXmmRegisterOperand(acc.LowBits(), mul_right);
-}
-
-void X86_64Assembler::vfmsub231pd(XmmRegister acc, XmmRegister mul_left, XmmRegister mul_right) {
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- uint8_t byte_zero = EmitVexByteZero(false /*is_two_byte*/);
- uint8_t byte_one = EmitVexByte1(acc.NeedsRex(), false, mul_right.NeedsRex(), 2);
- uint8_t byte_two = EmitVexByte2(true, 128, X86_64ManagedRegister::FromXmmRegister(mul_left.AsFloatRegister()), 1);
- EmitUint8(byte_zero);
- EmitUint8(byte_one);
- EmitUint8(byte_two);
- EmitUint8(0xBA);
- EmitXmmRegisterOperand(acc.LowBits(), mul_right);
-}
void X86_64Assembler::addps(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(dst, src);
@@ -3594,98 +3544,6 @@
label->LinkTo(position);
}
-uint8_t X86_64Assembler::EmitVexByteZero(bool is_two_byte) {
- uint8_t vex_zero = 0xC0;
- if (!is_two_byte) {
- vex_zero |= 0xC4;
- } else {
- vex_zero |= 0xC5;
- }
- return vex_zero;
-}
-
-uint8_t X86_64Assembler::EmitVexByte1(bool r, bool x, bool b, int mmmmm) {
- // VEX Byte 1.
- uint8_t vex_prefix = 0;
- if (!r) {
- vex_prefix |= 0x80; // VEX.R .
- }
- if (!x) {
- vex_prefix |= 0x40; // VEX.X .
- }
- if (!b) {
- vex_prefix |= 0x20; // VEX.B .
- }
-
- // VEX.mmmmm.
- switch (mmmmm) {
- case 1:
- // Implied 0F leading opcode byte.
- vex_prefix |= 0x01;
- break;
- case 2:
- // Implied leading 0F 38 opcode byte.
- vex_prefix |= 0x02;
- break;
- case 3:
- // Implied leading OF 3A opcode byte.
- vex_prefix |= 0x03;
- break;
- default:
- LOG(FATAL) << "unknown opcode bytes";
- }
-
- return vex_prefix;
-}
-
-uint8_t X86_64Assembler::EmitVexByte2(bool w, int l, X86_64ManagedRegister operand, int pp) {
- // VEX Byte 2.
- uint8_t vex_prefix = 0;
- if (w) {
- vex_prefix |= 0x80;
- }
- // VEX.vvvv.
- if (operand.IsXmmRegister()) {
- XmmRegister vvvv = operand.AsXmmRegister();
- int inverted_reg = 15-static_cast<int>(vvvv.AsFloatRegister());
- uint8_t reg = static_cast<uint8_t>(inverted_reg);
- vex_prefix |= ((reg & 0x0F) << 3);
- } else if (operand.IsCpuRegister()) {
- CpuRegister vvvv = operand.AsCpuRegister();
- int inverted_reg = 15 - static_cast<int>(vvvv.AsRegister());
- uint8_t reg = static_cast<uint8_t>(inverted_reg);
- vex_prefix |= ((reg & 0x0F) << 3);
- }
-
- // VEX.L.
- if (l == 256) {
- vex_prefix |= 0x04;
- }
-
- // VEX.pp.
- switch (pp) {
- case 0:
- // SIMD Pefix - None.
- vex_prefix |= 0x00;
- break;
- case 1:
- // SIMD Prefix - 66.
- vex_prefix |= 0x01;
- break;
- case 2:
- // SIMD Prefix - F3.
- vex_prefix |= 0x02;
- break;
- case 3:
- // SIMD Prefix - F2.
- vex_prefix |= 0x03;
- break;
- default:
- LOG(FATAL) << "unknown SIMD Prefix";
- }
-
- return vex_prefix;
-}
void X86_64Assembler::EmitGenericShift(bool wide,
int reg_or_opcode,
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index d5779aa..e4d72a7 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -436,16 +436,6 @@
void divss(XmmRegister dst, XmmRegister src);
void divss(XmmRegister dst, const Address& src);
- // Mac Instructions
- // For reference look at the Instruction reference volume 2C.
- // The below URL is broken down in two lines.
- // https://www.intel.com/content/www/us/en/architecture-and-technology/
- // 64-ia-32-architectures-software-developer-vol-2c-manual.html
- void vfmadd231ps(XmmRegister acc, XmmRegister left, XmmRegister right);
- void vfmadd231pd(XmmRegister acc, XmmRegister left, XmmRegister right);
- void vfmsub231ps(XmmRegister acc, XmmRegister left, XmmRegister right);
- void vfmsub231pd(XmmRegister acc, XmmRegister left, XmmRegister right);
-
void addps(XmmRegister dst, XmmRegister src); // no addr variant (for now)
void subps(XmmRegister dst, XmmRegister src);
void mulps(XmmRegister dst, XmmRegister src);
@@ -931,11 +921,6 @@
void EmitLabelLink(Label* label);
void EmitLabelLink(NearLabel* label);
- // Emit a 3 byte VEX Prefix.
- uint8_t EmitVexByteZero(bool is_two_byte);
- uint8_t EmitVexByte1(bool r, bool x, bool b, int mmmmm);
- uint8_t EmitVexByte2(bool w , int l , X86_64ManagedRegister operand, int pp);
-
void EmitGenericShift(bool wide, int rm, CpuRegister reg, const Immediate& imm);
void EmitGenericShift(bool wide, int rm, CpuRegister operand, CpuRegister shifter);