summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
author Android Build Coastguard Worker <android-build-coastguard-worker@google.com> 2025-01-03 18:01:28 -0800
committer Android Build Coastguard Worker <android-build-coastguard-worker@google.com> 2025-01-03 18:01:28 -0800
commitfc20f5dacc66c03d0634bb79bf9318e9f8467dcc (patch)
tree1c0e3df6cff6dbf1a6e24302c7ffe8a4739ee68a /compiler
parentef26438a09c6c19d51be82555a2870b1bfe2d50f (diff)
parentd34713915fa125c29c0db5500a533b7bc0f7167e (diff)
Snap for 12868993 from d34713915fa125c29c0db5500a533b7bc0f7167e to 25Q2-release
Change-Id: Ibd5023fa2ed3e2be8668fb9c81f31b21630d2a3d
Diffstat (limited to 'compiler')
-rw-r--r--compiler/optimizing/intrinsics.h2
-rw-r--r--compiler/optimizing/intrinsics_arm64.cc2
-rw-r--r--compiler/optimizing/intrinsics_arm_vixl.cc2
-rw-r--r--compiler/optimizing/intrinsics_riscv64.cc2
-rw-r--r--compiler/optimizing/intrinsics_utils.h2
-rw-r--r--compiler/optimizing/intrinsics_x86_64.cc2
-rw-r--r--compiler/optimizing/superblock_cloner.cc64
-rw-r--r--compiler/optimizing/superblock_cloner.h50
-rw-r--r--compiler/optimizing/superblock_cloner_test.cc46
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.cc891
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.h7
11 files changed, 103 insertions, 967 deletions
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index 7a27b2506b..4f164e10c6 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -270,7 +270,7 @@ class VarHandleOptimizations : public IntrinsicOptimizations {
INTRINSIC_OPTIMIZATION(DoNotIntrinsify, 0); // One of the checks is statically known to fail.
INTRINSIC_OPTIMIZATION(SkipObjectNullCheck, 1); // Not applicable for static fields.
- // Use known `VarHandle` from the boot image. To apply this optimization, the following
+ // Use known `VarHandle` from the boot/app image. To apply this optimization, the following
// `VarHandle` checks must pass based on static analysis:
// - `VarHandle` type check (must match the coordinate count),
// - access mode check,
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index d12b2f7981..a4463cb248 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -4863,7 +4863,7 @@ static void GenerateVarHandleTarget(HInvoke* invoke,
if (expected_coordinates_count <= 1u) {
if (VarHandleOptimizations(invoke).GetUseKnownImageVarHandle()) {
ScopedObjectAccess soa(Thread::Current());
- ArtField* target_field = GetBootImageVarHandleField(invoke);
+ ArtField* target_field = GetImageVarHandleField(invoke);
if (expected_coordinates_count == 0u) {
ObjPtr<mirror::Class> declaring_class = target_field->GetDeclaringClass();
if (Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(declaring_class)) {
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index 04793b4f66..db7457a68b 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -4597,7 +4597,7 @@ static void GenerateVarHandleTarget(HInvoke* invoke,
if (expected_coordinates_count <= 1u) {
if (VarHandleOptimizations(invoke).GetUseKnownImageVarHandle()) {
ScopedObjectAccess soa(Thread::Current());
- ArtField* target_field = GetBootImageVarHandleField(invoke);
+ ArtField* target_field = GetImageVarHandleField(invoke);
if (expected_coordinates_count == 0u) {
ObjPtr<mirror::Class> declaring_class = target_field->GetDeclaringClass();
if (Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(declaring_class)) {
diff --git a/compiler/optimizing/intrinsics_riscv64.cc b/compiler/optimizing/intrinsics_riscv64.cc
index 32816b5f4b..65776236c1 100644
--- a/compiler/optimizing/intrinsics_riscv64.cc
+++ b/compiler/optimizing/intrinsics_riscv64.cc
@@ -3774,7 +3774,7 @@ static void GenerateVarHandleTarget(HInvoke* invoke,
if (expected_coordinates_count <= 1u) {
if (VarHandleOptimizations(invoke).GetUseKnownImageVarHandle()) {
ScopedObjectAccess soa(Thread::Current());
- ArtField* target_field = GetBootImageVarHandleField(invoke);
+ ArtField* target_field = GetImageVarHandleField(invoke);
if (expected_coordinates_count == 0u) {
ObjPtr<mirror::Class> declaring_class = target_field->GetDeclaringClass();
if (Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(declaring_class)) {
diff --git a/compiler/optimizing/intrinsics_utils.h b/compiler/optimizing/intrinsics_utils.h
index 6c08cea3f8..c2f32ae708 100644
--- a/compiler/optimizing/intrinsics_utils.h
+++ b/compiler/optimizing/intrinsics_utils.h
@@ -207,7 +207,7 @@ static inline DataType::Type GetVarHandleExpectedValueType(HInvoke* invoke,
}
}
-static inline ArtField* GetBootImageVarHandleField(HInvoke* invoke)
+static inline ArtField* GetImageVarHandleField(HInvoke* invoke)
REQUIRES_SHARED(Locks::mutator_lock_) {
DCHECK_LE(GetExpectedVarHandleCoordinatesCount(invoke), 1u);
DCHECK(VarHandleOptimizations(invoke).GetUseKnownImageVarHandle());
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index fd74f3f44d..18963bf135 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -4074,7 +4074,7 @@ static void GenerateVarHandleTarget(HInvoke* invoke,
if (expected_coordinates_count <= 1u) {
if (VarHandleOptimizations(invoke).GetUseKnownImageVarHandle()) {
ScopedObjectAccess soa(Thread::Current());
- ArtField* target_field = GetBootImageVarHandleField(invoke);
+ ArtField* target_field = GetImageVarHandleField(invoke);
if (expected_coordinates_count == 0u) {
ObjPtr<mirror::Class> declaring_class = target_field->GetDeclaringClass();
__ movl(CpuRegister(target.object),
diff --git a/compiler/optimizing/superblock_cloner.cc b/compiler/optimizing/superblock_cloner.cc
index 276d2246cb..a7328a1218 100644
--- a/compiler/optimizing/superblock_cloner.cc
+++ b/compiler/optimizing/superblock_cloner.cc
@@ -227,40 +227,6 @@ void SuperblockCloner::RemapCopyInternalEdge(HBasicBlock* orig_block,
}
}
-bool SuperblockCloner::IsRemapInfoForVersioning() const {
- return remap_incoming_->empty() &&
- remap_orig_internal_->empty() &&
- remap_copy_internal_->empty();
-}
-
-void SuperblockCloner::CopyIncomingEdgesForVersioning() {
- for (uint32_t orig_block_id : orig_bb_set_.Indexes()) {
- HBasicBlock* orig_block = GetBlockById(orig_block_id);
- size_t incoming_edge_count = 0;
- for (HBasicBlock* orig_pred : orig_block->GetPredecessors()) {
- uint32_t orig_pred_id = orig_pred->GetBlockId();
- if (IsInOrigBBSet(orig_pred_id)) {
- continue;
- }
-
- HBasicBlock* copy_block = GetBlockCopy(orig_block);
- // This corresponds to the requirement on the order of predecessors: all the incoming
- // edges must be seen before the internal ones. This is always true for natural loops.
- // TODO: remove this requirement.
- DCHECK_EQ(orig_block->GetPredecessorIndexOf(orig_pred), incoming_edge_count);
- for (HInstructionIterator it(orig_block->GetPhis()); !it.Done(); it.Advance()) {
- HPhi* orig_phi = it.Current()->AsPhi();
- HPhi* copy_phi = GetInstrCopy(orig_phi)->AsPhi();
- HInstruction* orig_phi_input = orig_phi->InputAt(incoming_edge_count);
- // Add the corresponding input of the original phi to the copy one.
- copy_phi->AddInput(orig_phi_input);
- }
- copy_block->AddPredecessor(orig_pred);
- incoming_edge_count++;
- }
- }
-}
-
//
// Local versions of CF calculation/adjustment routines.
//
@@ -484,12 +450,6 @@ void SuperblockCloner::FindAndSetLocalAreaForAdjustments() {
}
void SuperblockCloner::RemapEdgesSuccessors() {
- // By this stage all the blocks have been copied, copy phis - created with no inputs;
- // no copy edges have been created so far.
- if (IsRemapInfoForVersioning()) {
- CopyIncomingEdgesForVersioning();
- }
-
// Redirect incoming edges.
for (HEdge e : *remap_incoming_) {
HBasicBlock* orig_block = GetBlockById(e.GetFrom());
@@ -897,7 +857,7 @@ bool SuperblockCloner::IsSubgraphClonable() const {
return true;
}
-// Checks that loop unrolling/peeling/versioning is being conducted.
+// Checks that loop unrolling/peeling is being conducted.
bool SuperblockCloner::IsFastCase() const {
// Check that all the basic blocks belong to the same loop.
bool flag = false;
@@ -914,15 +874,11 @@ bool SuperblockCloner::IsFastCase() const {
}
}
- // Check that orig_bb_set_ corresponds to loop peeling/unrolling/versioning.
+ // Check that orig_bb_set_ corresponds to loop peeling/unrolling.
if (common_loop_info == nullptr || !orig_bb_set_.SameBitsSet(&common_loop_info->GetBlocks())) {
return false;
}
- if (IsRemapInfoForVersioning()) {
- return true;
- }
-
bool peeling_or_unrolling = false;
HEdgeSet remap_orig_internal(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
HEdgeSet remap_copy_internal(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
@@ -1171,9 +1127,6 @@ HBasicBlock* LoopClonerHelper::DoLoopTransformationImpl(TransformationKind trans
case TransformationKind::kUnrolling:
oss<< "unrolling";
break;
- case TransformationKind::kVersioning:
- oss << "versioning";
- break;
}
oss << " was applied to the loop <" << loop_header->GetBlockId() << ">.";
LOG(INFO) << oss.str();
@@ -1185,14 +1138,11 @@ HBasicBlock* LoopClonerHelper::DoLoopTransformationImpl(TransformationKind trans
HEdgeSet remap_copy_internal(graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
HEdgeSet remap_incoming(graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
- // No remapping needed for loop versioning.
- if (transformation != TransformationKind::kVersioning) {
- CollectRemappingInfoForPeelUnroll(transformation == TransformationKind::kUnrolling,
- loop_info_,
- &remap_orig_internal,
- &remap_copy_internal,
- &remap_incoming);
- }
+ CollectRemappingInfoForPeelUnroll(transformation == TransformationKind::kUnrolling,
+ loop_info_,
+ &remap_orig_internal,
+ &remap_copy_internal,
+ &remap_incoming);
cloner_.SetSuccessorRemappingInfo(&remap_orig_internal, &remap_copy_internal, &remap_incoming);
cloner_.Run();
diff --git a/compiler/optimizing/superblock_cloner.h b/compiler/optimizing/superblock_cloner.h
index c327867342..d4db0b3852 100644
--- a/compiler/optimizing/superblock_cloner.h
+++ b/compiler/optimizing/superblock_cloner.h
@@ -90,8 +90,7 @@ inline bool IsEdgeValid(HEdge edge, HGraph* graph) {
// fine grain manipulation with IR; data flow and graph properties are resolved/adjusted
// automatically. The clone transformation is defined by specifying a set of basic blocks to copy
// and a set of rules how to treat edges, remap their successors. By using this approach such
-// optimizations as Branch Target Expansion, Loop Peeling, Loop Unrolling, Loop Versioning can be
-// implemented.
+// optimizations as Branch Target Expansion, Loop Peeling, Loop Unrolling can be implemented.
//
// The idea of the transformation is based on "Superblock cloning" technique described in the book
// "Engineering a Compiler. Second Edition", Keith D. Cooper, Linda Torczon, Rice University
@@ -163,7 +162,7 @@ class SuperblockCloner : public ValueObject {
//
// TODO: formally describe the criteria.
//
- // Loop peeling, unrolling and versioning satisfy the criteria.
+ // Loop peeling and unrolling satisfy the criteria.
bool IsFastCase() const;
// Runs the copy algorithm according to the description.
@@ -299,18 +298,6 @@ class SuperblockCloner : public ValueObject {
// Remaps copy internal edge to its origin, adjusts the phi inputs in orig_succ.
void RemapCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ);
- // Checks whether the edges remapping info corresponds to the subgraph versioning case:
- // - none of the incoming edges are to be remapped (they are being duplicated).
- // - none of the internal edges are to be remapped.
- bool IsRemapInfoForVersioning() const;
-
- // Processes incoming edges for subgraph versioning case: for each incoming edge (X, Y) adds
- // an edge (X, Y_1) where Y_1 = Copy(Y) and add corresponding phi input to copy phi.
- //
- // Note: such node X will now have two successors, its unconditional branch instruction
- // will be invalid and should be adjusted to some conditional branch by the client code.
- void CopyIncomingEdgesForVersioning();
-
//
// Local versions of control flow calculation/adjustment routines.
//
@@ -376,7 +363,7 @@ class SuperblockCloner : public ValueObject {
DISALLOW_COPY_AND_ASSIGN(SuperblockCloner);
};
-// Helper class to perform loop peeling/unrolling/versioning.
+// Helper class to perform loop peeling/unrolling.
//
// This helper should be used when correspondence map between original and copied
// basic blocks/instructions are demanded.
@@ -456,40 +443,12 @@ class LoopClonerHelper : public ValueObject {
return DoLoopTransformationImpl(TransformationKind::kUnrolling);
}
- // Perform loop versioning.
- //
- // Control flow of an example (ignoring critical edges splitting).
- //
- // Before After
- //
- // |B| |B|
- // | |
- // v v
- // |1| |1|_________
- // | | |
- // v v v
- // |2|<-\ |2|<-\ |2A|<-\
- // / \ / / \ / / \ /
- // v v/ | v/ | v/
- // | |3| | |3| | |3A|
- // | | __________|
- // | ||
- // v vv
- // |4| |4|
- // | |
- // v v
- // |E| |E|
- HBasicBlock* DoVersioning() {
- return DoLoopTransformationImpl(TransformationKind::kVersioning);
- }
-
HLoopInformation* GetRegionToBeAdjusted() const { return cloner_.GetRegionToBeAdjusted(); }
protected:
enum class TransformationKind {
kPeeling,
kUnrolling,
- kVersioning,
};
// Applies a specific loop transformation to the loop.
@@ -502,7 +461,7 @@ class LoopClonerHelper : public ValueObject {
DISALLOW_COPY_AND_ASSIGN(LoopClonerHelper);
};
-// Helper class to perform loop peeling/unrolling/versioning.
+// Helper class to perform loop peeling/unrolling.
//
// This helper should be used when there is no need to get correspondence information between
// original and copied basic blocks/instructions.
@@ -512,7 +471,6 @@ class LoopClonerSimpleHelper : public ValueObject {
bool IsLoopClonable() const { return helper_.IsLoopClonable(); }
HBasicBlock* DoPeeling() { return helper_.DoPeeling(); }
HBasicBlock* DoUnrolling() { return helper_.DoUnrolling(); }
- HBasicBlock* DoVersioning() { return helper_.DoVersioning(); }
HLoopInformation* GetRegionToBeAdjusted() const { return helper_.GetRegionToBeAdjusted(); }
const SuperblockCloner::HBasicBlockMap* GetBasicBlockMap() const { return &bb_map_; }
diff --git a/compiler/optimizing/superblock_cloner_test.cc b/compiler/optimizing/superblock_cloner_test.cc
index 5190dae033..1bef8a4e9d 100644
--- a/compiler/optimizing/superblock_cloner_test.cc
+++ b/compiler/optimizing/superblock_cloner_test.cc
@@ -301,52 +301,6 @@ TEST_F(SuperblockClonerTest, LoopUnrolling) {
EXPECT_EQ(loop_info->GetBackEdges()[0], bb_map.Get(loop_body));
}
-// Tests SuperblockCloner for loop versioning case.
-//
-// See an ASCII graphics example near LoopClonerHelper::DoVersioning.
-TEST_F(SuperblockClonerTest, LoopVersioning) {
- HBasicBlock* return_block = InitGraphAndParameters();
- auto [preheader, header, loop_body] = CreateWhileLoop(return_block);
- CreateBasicLoopDataFlow(header, loop_body);
- graph_->BuildDominatorTree();
- EXPECT_TRUE(CheckGraph());
-
- HBasicBlockMap bb_map(
- std::less<HBasicBlock*>(), graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
- HInstructionMap hir_map(
- std::less<HInstruction*>(), graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
-
- HLoopInformation* loop_info = header->GetLoopInformation();
- HBasicBlock* original_preheader = loop_info->GetPreHeader();
- LoopClonerHelper helper(loop_info, &bb_map, &hir_map, /* induction_range= */ nullptr);
- EXPECT_TRUE(helper.IsLoopClonable());
- HBasicBlock* new_header = helper.DoVersioning();
- EXPECT_EQ(header, new_header);
-
- EXPECT_TRUE(CheckGraph());
-
- HBasicBlock* second_header = bb_map.Get(header);
- HBasicBlock* second_body = bb_map.Get(loop_body);
- HLoopInformation* second_loop_info = second_header->GetLoopInformation();
-
- // Check loop body successors.
- EXPECT_EQ(loop_body->GetSingleSuccessor(), header);
- EXPECT_EQ(second_body->GetSingleSuccessor(), second_header);
-
- // Check loop structure.
- EXPECT_EQ(loop_info, header->GetLoopInformation());
- EXPECT_EQ(loop_info->GetHeader(), header);
- EXPECT_EQ(second_loop_info->GetHeader(), second_header);
-
- EXPECT_EQ(loop_info->GetBackEdges().size(), 1u);
- EXPECT_EQ(second_loop_info->GetBackEdges().size(), 1u);
-
- EXPECT_EQ(loop_info->GetBackEdges()[0], loop_body);
- EXPECT_EQ(second_loop_info->GetBackEdges()[0], second_body);
-
- EXPECT_EQ(original_preheader->GetSuccessors().size(), 2u);
-}
-
// Checks that loop unrolling works fine for a loop with multiple back edges. Tests that after
// the transformation the loop has a single preheader.
TEST_F(SuperblockClonerTest, LoopPeelingMultipleBackEdges) {
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 959e291dc9..6330cc5d62 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -736,59 +736,12 @@ void X86_64Assembler::subps(XmmRegister dst, XmmRegister src) {
}
void X86_64Assembler::vaddps(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
- DCHECK(CpuHasAVXorAVX2FeatureFlag());
- bool is_twobyte_form = false;
- uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
- if (!add_right.NeedsRex()) {
- is_twobyte_form = true;
- } else if (!add_left.NeedsRex()) {
- return vaddps(dst, add_right, add_left);
- }
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- X86_64ManagedRegister vvvv_reg =
- X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
- ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
- if (is_twobyte_form) {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
- } else {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
- /*X=*/ false,
- add_right.NeedsRex(),
- SET_VEX_M_0F);
- ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
- }
- EmitUint8(ByteZero);
- EmitUint8(ByteOne);
- if (!is_twobyte_form) {
- EmitUint8(ByteTwo);
- }
- EmitUint8(0x58);
- EmitXmmRegisterOperand(dst.LowBits(), add_right);
+ EmitVecArithAndLogicalOperation(
+ dst, add_left, add_right, /*opcode=*/ 0x58, SET_VEX_PP_NONE, /*is_commutative=*/ true);
}
void X86_64Assembler::vsubps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
- DCHECK(CpuHasAVXorAVX2FeatureFlag());
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- bool is_twobyte_form = false;
- uint8_t byte_zero = 0x00, byte_one = 0x00, byte_two = 0x00;
- if (!src2.NeedsRex()) {
- is_twobyte_form = true;
- }
- byte_zero = EmitVexPrefixByteZero(is_twobyte_form);
- X86_64ManagedRegister vvvv_reg = X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
- if (is_twobyte_form) {
- byte_one = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
- } else {
- byte_one = EmitVexPrefixByteOne(dst.NeedsRex(), /*X=*/ false, src2.NeedsRex(), SET_VEX_M_0F);
- byte_two = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
- }
- EmitUint8(byte_zero);
- EmitUint8(byte_one);
- if (!is_twobyte_form) {
- EmitUint8(byte_two);
- }
- EmitUint8(0x5C);
- EmitXmmRegisterOperand(dst.LowBits(), src2);
+ EmitVecArithAndLogicalOperation(dst, src1, src2, /*opcode=*/ 0x5C, SET_VEX_PP_NONE);
}
@@ -801,34 +754,8 @@ void X86_64Assembler::mulps(XmmRegister dst, XmmRegister src) {
}
void X86_64Assembler::vmulps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
- DCHECK(CpuHasAVXorAVX2FeatureFlag());
- bool is_twobyte_form = false;
- uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
- if (!src2.NeedsRex()) {
- is_twobyte_form = true;
- } else if (!src1.NeedsRex()) {
- return vmulps(dst, src2, src1);
- }
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
- X86_64ManagedRegister vvvv_reg =
- X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
- if (is_twobyte_form) {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
- } else {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
- /*X=*/ false,
- src2.NeedsRex(),
- SET_VEX_M_0F);
- ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
- }
- EmitUint8(ByteZero);
- EmitUint8(ByteOne);
- if (!is_twobyte_form) {
- EmitUint8(ByteTwo);
- }
- EmitUint8(0x59);
- EmitXmmRegisterOperand(dst.LowBits(), src2);
+ EmitVecArithAndLogicalOperation(
+ dst, src1, src2, /*opcode=*/ 0x59, SET_VEX_PP_NONE, /*is_commutative=*/ true);
}
void X86_64Assembler::divps(XmmRegister dst, XmmRegister src) {
@@ -840,32 +767,7 @@ void X86_64Assembler::divps(XmmRegister dst, XmmRegister src) {
}
void X86_64Assembler::vdivps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
- DCHECK(CpuHasAVXorAVX2FeatureFlag());
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- bool is_twobyte_form = false;
- uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
- if (!src2.NeedsRex()) {
- is_twobyte_form = true;
- }
- ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
- X86_64ManagedRegister vvvv_reg =
- X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
- if (is_twobyte_form) {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
- } else {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
- /*X=*/ false,
- src2.NeedsRex(),
- SET_VEX_M_0F);
- ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
- }
- EmitUint8(ByteZero);
- EmitUint8(ByteOne);
- if (!is_twobyte_form) {
- EmitUint8(ByteTwo);
- }
- EmitUint8(0x5E);
- EmitXmmRegisterOperand(dst.LowBits(), src2);
+ EmitVecArithAndLogicalOperation(dst, src1, src2, /*opcode=*/ 0x5E, SET_VEX_PP_NONE);
}
void X86_64Assembler::vfmadd213ss(XmmRegister acc, XmmRegister left, XmmRegister right) {
@@ -1209,33 +1111,8 @@ void X86_64Assembler::addpd(XmmRegister dst, XmmRegister src) {
void X86_64Assembler::vaddpd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
- bool is_twobyte_form = false;
- uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
- if (!add_right.NeedsRex()) {
- is_twobyte_form = true;
- } else if (!add_left.NeedsRex()) {
- return vaddpd(dst, add_right, add_left);
- }
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
- X86_64ManagedRegister vvvv_reg =
- X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
- if (is_twobyte_form) {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
- } else {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
- /*X=*/ false,
- add_right.NeedsRex(),
- SET_VEX_M_0F);
- ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
- }
- EmitUint8(ByteZero);
- EmitUint8(ByteOne);
- if (!is_twobyte_form) {
- EmitUint8(ByteTwo);
- }
- EmitUint8(0x58);
- EmitXmmRegisterOperand(dst.LowBits(), add_right);
+ EmitVecArithAndLogicalOperation(
+ dst, add_left, add_right, /*opcode=*/ 0x58, SET_VEX_PP_66, /*is_commutative=*/ true);
}
@@ -1250,31 +1127,7 @@ void X86_64Assembler::subpd(XmmRegister dst, XmmRegister src) {
void X86_64Assembler::vsubpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- bool is_twobyte_form = false;
- uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
- if (!src2.NeedsRex()) {
- is_twobyte_form = true;
- }
- ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
- X86_64ManagedRegister vvvv_reg =
- X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
- if (is_twobyte_form) {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
- } else {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
- /*X=*/ false,
- src2.NeedsRex(),
- SET_VEX_M_0F);
- ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
- }
- EmitUint8(ByteZero);
- EmitUint8(ByteOne);
- if (!is_twobyte_form) {
- EmitUint8(ByteTwo);
- }
- EmitUint8(0x5C);
- EmitXmmRegisterOperand(dst.LowBits(), src2);
+ EmitVecArithAndLogicalOperation(dst, src1, src2, /*opcode=*/ 0x5C, SET_VEX_PP_66);
}
@@ -1288,34 +1141,8 @@ void X86_64Assembler::mulpd(XmmRegister dst, XmmRegister src) {
}
void X86_64Assembler::vmulpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
- DCHECK(CpuHasAVXorAVX2FeatureFlag());
- bool is_twobyte_form = false;
- uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
- if (!src2.NeedsRex()) {
- is_twobyte_form = true;
- } else if (!src1.NeedsRex()) {
- return vmulpd(dst, src2, src1);
- }
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
- X86_64ManagedRegister vvvv_reg =
- X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
- if (is_twobyte_form) {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
- } else {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
- /*X=*/ false,
- src2.NeedsRex(),
- SET_VEX_M_0F);
- ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
- }
- EmitUint8(ByteZero);
- EmitUint8(ByteOne);
- if (!is_twobyte_form) {
- EmitUint8(ByteTwo);
- }
- EmitUint8(0x59);
- EmitXmmRegisterOperand(dst.LowBits(), src2);
+ EmitVecArithAndLogicalOperation(
+ dst, src1, src2, /*opcode=*/ 0x59, SET_VEX_PP_66, /*is_commutative=*/ true);
}
void X86_64Assembler::divpd(XmmRegister dst, XmmRegister src) {
@@ -1329,32 +1156,7 @@ void X86_64Assembler::divpd(XmmRegister dst, XmmRegister src) {
void X86_64Assembler::vdivpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
- DCHECK(CpuHasAVXorAVX2FeatureFlag());
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- bool is_twobyte_form = false;
- uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
- if (!src2.NeedsRex()) {
- is_twobyte_form = true;
- }
- ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
- X86_64ManagedRegister vvvv_reg =
- X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
- if (is_twobyte_form) {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
- } else {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
- /*X=*/ false,
- src2.NeedsRex(),
- SET_VEX_M_0F);
- ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
- }
- EmitUint8(ByteZero);
- EmitUint8(ByteOne);
- if (!is_twobyte_form) {
- EmitUint8(ByteTwo);
- }
- EmitUint8(0x5E);
- EmitXmmRegisterOperand(dst.LowBits(), src2);
+ EmitVecArithAndLogicalOperation(dst, src1, src2, /*opcode=*/ 0x5E, SET_VEX_PP_66);
}
@@ -1531,34 +1333,8 @@ void X86_64Assembler::paddb(XmmRegister dst, XmmRegister src) {
void X86_64Assembler::vpaddb(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
- DCHECK(CpuHasAVXorAVX2FeatureFlag());
- uint8_t ByteOne = 0x00, ByteZero = 0x00, ByteTwo = 0x00;
- bool is_twobyte_form = false;
- if (!add_right.NeedsRex()) {
- is_twobyte_form = true;
- } else if (!add_left.NeedsRex()) {
- return vpaddb(dst, add_right, add_left);
- }
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
- X86_64ManagedRegister vvvv_reg =
- X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
- if (is_twobyte_form) {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
- } else {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
- /*X=*/ false,
- add_right.NeedsRex(),
- SET_VEX_M_0F);
- ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
- }
- EmitUint8(ByteZero);
- EmitUint8(ByteOne);
- if (!is_twobyte_form) {
- EmitUint8(ByteTwo);
- }
- EmitUint8(0xFC);
- EmitXmmRegisterOperand(dst.LowBits(), add_right);
+ EmitVecArithAndLogicalOperation(
+ dst, add_left, add_right, /*opcode=*/ 0xFC, SET_VEX_PP_66, /*is_commutative=*/ true);
}
@@ -1573,32 +1349,7 @@ void X86_64Assembler::psubb(XmmRegister dst, XmmRegister src) {
void X86_64Assembler::vpsubb(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
- DCHECK(CpuHasAVXorAVX2FeatureFlag());
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- bool is_twobyte_form = false;
- uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
- if (!add_right.NeedsRex()) {
- is_twobyte_form = true;
- }
- ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
- X86_64ManagedRegister vvvv_reg =
- X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
- if (is_twobyte_form) {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
- } else {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
- /*X=*/ false,
- add_right.NeedsRex(),
- SET_VEX_M_0F);
- ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
- }
- EmitUint8(ByteZero);
- EmitUint8(ByteOne);
- if (!is_twobyte_form) {
- EmitUint8(ByteTwo);
- }
- EmitUint8(0xF8);
- EmitXmmRegisterOperand(dst.LowBits(), add_right);
+ EmitVecArithAndLogicalOperation(dst, add_left, add_right, /*opcode=*/ 0xF8, SET_VEX_PP_66);
}
@@ -1612,34 +1363,8 @@ void X86_64Assembler::paddw(XmmRegister dst, XmmRegister src) {
}
void X86_64Assembler::vpaddw(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
- DCHECK(CpuHasAVXorAVX2FeatureFlag());
- bool is_twobyte_form = false;
- uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
- if (!add_right.NeedsRex()) {
- is_twobyte_form = true;
- } else if (!add_left.NeedsRex()) {
- return vpaddw(dst, add_right, add_left);
- }
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
- X86_64ManagedRegister vvvv_reg =
- X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
- if (is_twobyte_form) {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
- } else {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
- /*X=*/ false,
- add_right.NeedsRex(),
- SET_VEX_M_0F);
- ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
- }
- EmitUint8(ByteZero);
- EmitUint8(ByteOne);
- if (!is_twobyte_form) {
- EmitUint8(ByteTwo);
- }
- EmitUint8(0xFD);
- EmitXmmRegisterOperand(dst.LowBits(), add_right);
+ EmitVecArithAndLogicalOperation(
+ dst, add_left, add_right, /*opcode=*/ 0xFD, SET_VEX_PP_66, /*is_commutative=*/ true);
}
@@ -1653,32 +1378,7 @@ void X86_64Assembler::psubw(XmmRegister dst, XmmRegister src) {
}
void X86_64Assembler::vpsubw(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
- DCHECK(CpuHasAVXorAVX2FeatureFlag());
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- bool is_twobyte_form = false;
- uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
- if (!add_right.NeedsRex()) {
- is_twobyte_form = true;
- }
- ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
- X86_64ManagedRegister vvvv_reg =
- X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
- if (is_twobyte_form) {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
- } else {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
- /*X=*/ false,
- add_right.NeedsRex(),
- SET_VEX_M_0F);
- ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
- }
- EmitUint8(ByteZero);
- EmitUint8(ByteOne);
- if (!is_twobyte_form) {
- EmitUint8(ByteTwo);
- }
- EmitUint8(0xF9);
- EmitXmmRegisterOperand(dst.LowBits(), add_right);
+ EmitVecArithAndLogicalOperation(dst, add_left, add_right, /*opcode=*/ 0xF9, SET_VEX_PP_66);
}
@@ -1692,34 +1392,8 @@ void X86_64Assembler::pmullw(XmmRegister dst, XmmRegister src) {
}
void X86_64Assembler::vpmullw(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
- DCHECK(CpuHasAVXorAVX2FeatureFlag());
- bool is_twobyte_form = false;
- uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
- if (!src2.NeedsRex()) {
- is_twobyte_form = true;
- } else if (!src1.NeedsRex()) {
- return vpmullw(dst, src2, src1);
- }
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
- X86_64ManagedRegister vvvv_reg =
- X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
- if (is_twobyte_form) {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
- } else {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
- /*X=*/ false,
- src2.NeedsRex(),
- SET_VEX_M_0F);
- ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
- }
- EmitUint8(ByteZero);
- EmitUint8(ByteOne);
- if (!is_twobyte_form) {
- EmitUint8(ByteTwo);
- }
- EmitUint8(0xD5);
- EmitXmmRegisterOperand(dst.LowBits(), src2);
+ EmitVecArithAndLogicalOperation(
+ dst, src1, src2, /*opcode=*/ 0xD5, SET_VEX_PP_66, /*is_commutative=*/ true);
}
void X86_64Assembler::paddd(XmmRegister dst, XmmRegister src) {
@@ -1732,34 +1406,8 @@ void X86_64Assembler::paddd(XmmRegister dst, XmmRegister src) {
}
void X86_64Assembler::vpaddd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
- DCHECK(CpuHasAVXorAVX2FeatureFlag());
- bool is_twobyte_form = false;
- uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
- if (!add_right.NeedsRex()) {
- is_twobyte_form = true;
- } else if (!add_left.NeedsRex()) {
- return vpaddd(dst, add_right, add_left);
- }
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
- X86_64ManagedRegister vvvv_reg =
- X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
- if (is_twobyte_form) {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
- } else {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
- /*X=*/ false,
- add_right.NeedsRex(),
- SET_VEX_M_0F);
- ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
- }
- EmitUint8(ByteZero);
- EmitUint8(ByteOne);
- if (!is_twobyte_form) {
- EmitUint8(ByteTwo);
- }
- EmitUint8(0xFE);
- EmitXmmRegisterOperand(dst.LowBits(), add_right);
+ EmitVecArithAndLogicalOperation(
+ dst, add_left, add_right, /*opcode=*/ 0xFE, SET_VEX_PP_66, /*is_commutative=*/ true);
}
void X86_64Assembler::psubd(XmmRegister dst, XmmRegister src) {
@@ -1812,34 +1460,8 @@ void X86_64Assembler::paddq(XmmRegister dst, XmmRegister src) {
void X86_64Assembler::vpaddq(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
- DCHECK(CpuHasAVXorAVX2FeatureFlag());
- bool is_twobyte_form = false;
- uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
- if (!add_right.NeedsRex()) {
- is_twobyte_form = true;
- } else if (!add_left.NeedsRex()) {
- return vpaddq(dst, add_right, add_left);
- }
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
- X86_64ManagedRegister vvvv_reg =
- X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
- if (is_twobyte_form) {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
- } else {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
- /*X=*/ false,
- add_right.NeedsRex(),
- SET_VEX_M_0F);
- ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
- }
- EmitUint8(ByteZero);
- EmitUint8(ByteOne);
- if (!is_twobyte_form) {
- EmitUint8(ByteTwo);
- }
- EmitUint8(0xD4);
- EmitXmmRegisterOperand(dst.LowBits(), add_right);
+ EmitVecArithAndLogicalOperation(
+ dst, add_left, add_right, /*opcode=*/ 0xD4, SET_VEX_PP_66, /*is_commutative=*/ true);
}
@@ -1853,32 +1475,7 @@ void X86_64Assembler::psubq(XmmRegister dst, XmmRegister src) {
}
void X86_64Assembler::vpsubq(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
- DCHECK(CpuHasAVXorAVX2FeatureFlag());
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- bool is_twobyte_form = false;
- uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
- if (!add_right.NeedsRex()) {
- is_twobyte_form = true;
- }
- ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
- X86_64ManagedRegister vvvv_reg =
- X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
- if (is_twobyte_form) {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
- } else {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
- /*X=*/ false,
- add_right.NeedsRex(),
- SET_VEX_M_0F);
- ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
- }
- EmitUint8(ByteZero);
- EmitUint8(ByteOne);
- if (!is_twobyte_form) {
- EmitUint8(ByteTwo);
- }
- EmitUint8(0xFB);
- EmitXmmRegisterOperand(dst.LowBits(), add_right);
+ EmitVecArithAndLogicalOperation(dst, add_left, add_right, /*opcode=*/ 0xFB, SET_VEX_PP_66);
}
@@ -1943,32 +1540,7 @@ void X86_64Assembler::psubsb(XmmRegister dst, XmmRegister src) {
void X86_64Assembler::vpsubd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
- DCHECK(CpuHasAVXorAVX2FeatureFlag());
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- bool is_twobyte_form = false;
- uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
- if (!add_right.NeedsRex()) {
- is_twobyte_form = true;
- }
- ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
- X86_64ManagedRegister vvvv_reg =
- X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
- if (is_twobyte_form) {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
- } else {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
- /*X=*/ false,
- add_right.NeedsRex(),
- SET_VEX_M_0F);
- ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
- }
- EmitUint8(ByteZero);
- EmitUint8(ByteOne);
- if (!is_twobyte_form) {
- EmitUint8(ByteTwo);
- }
- EmitUint8(0xFA);
- EmitXmmRegisterOperand(dst.LowBits(), add_right);
+ EmitVecArithAndLogicalOperation(dst, add_left, add_right, /*opcode=*/ 0xFA, SET_VEX_PP_66);
}
@@ -2349,98 +1921,20 @@ void X86_64Assembler::pxor(XmmRegister dst, XmmRegister src) {
/* VEX.128.66.0F.WIG EF /r VPXOR xmm1, xmm2, xmm3/m128 */
void X86_64Assembler::vpxor(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
- DCHECK(CpuHasAVXorAVX2FeatureFlag());
- bool is_twobyte_form = false;
- uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
- if (!src2.NeedsRex()) {
- is_twobyte_form = true;
- } else if (!src1.NeedsRex()) {
- return vpxor(dst, src2, src1);
- }
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- X86_64ManagedRegister vvvv_reg =
- X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
- ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
- if (is_twobyte_form) {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
- } else {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
- /*X=*/ false,
- src2.NeedsRex(),
- SET_VEX_M_0F);
- ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
- }
- EmitUint8(ByteZero);
- EmitUint8(ByteOne);
- if (!is_twobyte_form) {
- EmitUint8(ByteTwo);
- }
- EmitUint8(0xEF);
- EmitXmmRegisterOperand(dst.LowBits(), src2);
+ EmitVecArithAndLogicalOperation(
+ dst, src1, src2, /*opcode=*/ 0xEF, SET_VEX_PP_66, /*is_commutative=*/ true);
}
/* VEX.128.0F.WIG 57 /r VXORPS xmm1,xmm2, xmm3/m128 */
void X86_64Assembler::vxorps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
- DCHECK(CpuHasAVXorAVX2FeatureFlag());
- bool is_twobyte_form = false;
- uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
- if (!src2.NeedsRex()) {
- is_twobyte_form = true;
- } else if (!src1.NeedsRex()) {
- return vxorps(dst, src2, src1);
- }
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- X86_64ManagedRegister vvvv_reg =
- X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
- ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
- if (is_twobyte_form) {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
- } else {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
- /*X=*/ false,
- src2.NeedsRex(),
- SET_VEX_M_0F);
- ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
- }
- EmitUint8(ByteZero);
- EmitUint8(ByteOne);
- if (!is_twobyte_form) {
- EmitUint8(ByteTwo);
- }
- EmitUint8(0x57);
- EmitXmmRegisterOperand(dst.LowBits(), src2);
+ EmitVecArithAndLogicalOperation(
+ dst, src1, src2, /*opcode=*/ 0x57, SET_VEX_PP_NONE, /*is_commutative=*/ true);
}
/* VEX.128.66.0F.WIG 57 /r VXORPD xmm1,xmm2, xmm3/m128 */
void X86_64Assembler::vxorpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
- DCHECK(CpuHasAVXorAVX2FeatureFlag());
- bool is_twobyte_form = false;
- uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
- if (!src2.NeedsRex()) {
- is_twobyte_form = true;
- } else if (!src1.NeedsRex()) {
- return vxorpd(dst, src2, src1);
- }
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- X86_64ManagedRegister vvvv_reg =
- X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
- ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
- if (is_twobyte_form) {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
- } else {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
- /*X=*/ false,
- src2.NeedsRex(),
- SET_VEX_M_0F);
- ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
- }
- EmitUint8(ByteZero);
- EmitUint8(ByteOne);
- if (!is_twobyte_form) {
- EmitUint8(ByteTwo);
- }
- EmitUint8(0x57);
- EmitXmmRegisterOperand(dst.LowBits(), src2);
+ EmitVecArithAndLogicalOperation(
+ dst, src1, src2, /*opcode=*/ 0x57, SET_VEX_PP_66, /*is_commutative=*/ true);
}
void X86_64Assembler::andpd(XmmRegister dst, const Address& src) {
@@ -2480,98 +1974,20 @@ void X86_64Assembler::pand(XmmRegister dst, XmmRegister src) {
/* VEX.128.66.0F.WIG DB /r VPAND xmm1, xmm2, xmm3/m128 */
void X86_64Assembler::vpand(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
- DCHECK(CpuHasAVXorAVX2FeatureFlag());
- bool is_twobyte_form = false;
- uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
- if (!src2.NeedsRex()) {
- is_twobyte_form = true;
- } else if (!src1.NeedsRex()) {
- return vpand(dst, src2, src1);
- }
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- X86_64ManagedRegister vvvv_reg =
- X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
- ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
- if (is_twobyte_form) {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
- } else {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
- /*X=*/ false,
- src2.NeedsRex(),
- SET_VEX_M_0F);
- ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
- }
- EmitUint8(ByteZero);
- EmitUint8(ByteOne);
- if (!is_twobyte_form) {
- EmitUint8(ByteTwo);
- }
- EmitUint8(0xDB);
- EmitXmmRegisterOperand(dst.LowBits(), src2);
+ EmitVecArithAndLogicalOperation(
+ dst, src1, src2, /*opcode=*/ 0xDB, SET_VEX_PP_66, /*is_commutative=*/ true);
}
/* VEX.128.0F 54 /r VANDPS xmm1,xmm2, xmm3/m128 */
void X86_64Assembler::vandps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
- DCHECK(CpuHasAVXorAVX2FeatureFlag());
- bool is_twobyte_form = false;
- uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
- if (!src2.NeedsRex()) {
- is_twobyte_form = true;
- } else if (!src1.NeedsRex()) {
- return vandps(dst, src2, src1);
- }
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- X86_64ManagedRegister vvvv_reg =
- X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
- ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
- if (is_twobyte_form) {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
- } else {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
- /*X=*/ false,
- src2.NeedsRex(),
- SET_VEX_M_0F);
- ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
- }
- EmitUint8(ByteZero);
- EmitUint8(ByteOne);
- if (!is_twobyte_form) {
- EmitUint8(ByteTwo);
- }
- EmitUint8(0x54);
- EmitXmmRegisterOperand(dst.LowBits(), src2);
+ EmitVecArithAndLogicalOperation(
+ dst, src1, src2, /*opcode=*/ 0x54, SET_VEX_PP_NONE, /*is_commutative=*/ true);
}
/* VEX.128.66.0F 54 /r VANDPD xmm1, xmm2, xmm3/m128 */
void X86_64Assembler::vandpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
- DCHECK(CpuHasAVXorAVX2FeatureFlag());
- bool is_twobyte_form = false;
- uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
- if (!src2.NeedsRex()) {
- is_twobyte_form = true;
- } else if (!src1.NeedsRex()) {
- return vandpd(dst, src2, src1);
- }
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- X86_64ManagedRegister vvvv_reg =
- X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
- ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
- if (is_twobyte_form) {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
- } else {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
- /*X=*/ false,
- src2.NeedsRex(),
- SET_VEX_M_0F);
- ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
- }
- EmitUint8(ByteZero);
- EmitUint8(ByteOne);
- if (!is_twobyte_form) {
- EmitUint8(ByteTwo);
- }
- EmitUint8(0x54);
- EmitXmmRegisterOperand(dst.LowBits(), src2);
+ EmitVecArithAndLogicalOperation(
+ dst, src1, src2, /*opcode=*/ 0x54, SET_VEX_PP_66, /*is_commutative=*/ true);
}
void X86_64Assembler::andn(CpuRegister dst, CpuRegister src1, CpuRegister src2) {
@@ -2621,92 +2037,17 @@ void X86_64Assembler::pandn(XmmRegister dst, XmmRegister src) {
/* VEX.128.66.0F.WIG DF /r VPANDN xmm1, xmm2, xmm3/m128 */
void X86_64Assembler::vpandn(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
- DCHECK(CpuHasAVXorAVX2FeatureFlag());
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- bool is_twobyte_form = false;
- uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
- if (!src2.NeedsRex()) {
- is_twobyte_form = true;
- }
- X86_64ManagedRegister vvvv_reg =
- X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
- ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
- if (is_twobyte_form) {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
- } else {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
- /*X=*/ false,
- src2.NeedsRex(),
- SET_VEX_M_0F);
- ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
- }
- EmitUint8(ByteZero);
- EmitUint8(ByteOne);
- if (!is_twobyte_form) {
- EmitUint8(ByteTwo);
- }
- EmitUint8(0xDF);
- EmitXmmRegisterOperand(dst.LowBits(), src2);
+ EmitVecArithAndLogicalOperation(dst, src1, src2, /*opcode=*/ 0xDF, SET_VEX_PP_66);
}
/* VEX.128.0F 55 /r VANDNPS xmm1, xmm2, xmm3/m128 */
void X86_64Assembler::vandnps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
- DCHECK(CpuHasAVXorAVX2FeatureFlag());
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- bool is_twobyte_form = false;
- uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
- if (!src2.NeedsRex()) {
- is_twobyte_form = true;
- }
- X86_64ManagedRegister vvvv_reg =
- X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
- ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
- if (is_twobyte_form) {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
- } else {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
- /*X=*/ false,
- src2.NeedsRex(),
- SET_VEX_M_0F);
- ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
- }
- EmitUint8(ByteZero);
- EmitUint8(ByteOne);
- if (!is_twobyte_form) {
- EmitUint8(ByteTwo);
- }
- EmitUint8(0x55);
- EmitXmmRegisterOperand(dst.LowBits(), src2);
+ EmitVecArithAndLogicalOperation(dst, src1, src2, /*opcode=*/ 0x55, SET_VEX_PP_NONE);
}
/* VEX.128.66.0F 55 /r VANDNPD xmm1, xmm2, xmm3/m128 */
void X86_64Assembler::vandnpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
- DCHECK(CpuHasAVXorAVX2FeatureFlag());
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- bool is_twobyte_form = false;
- uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
- if (!src2.NeedsRex()) {
- is_twobyte_form = true;
- }
- X86_64ManagedRegister vvvv_reg =
- X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
- ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
- if (is_twobyte_form) {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
- } else {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
- /*X=*/ false,
- src2.NeedsRex(),
- SET_VEX_M_0F);
- ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
- }
- EmitUint8(ByteZero);
- EmitUint8(ByteOne);
- if (!is_twobyte_form) {
- EmitUint8(ByteTwo);
- }
- EmitUint8(0x55);
- EmitXmmRegisterOperand(dst.LowBits(), src2);
+ EmitVecArithAndLogicalOperation(dst, src1, src2, /*opcode=*/ 0x55, SET_VEX_PP_66);
}
void X86_64Assembler::orpd(XmmRegister dst, XmmRegister src) {
@@ -2737,98 +2078,20 @@ void X86_64Assembler::por(XmmRegister dst, XmmRegister src) {
/* VEX.128.66.0F.WIG EB /r VPOR xmm1, xmm2, xmm3/m128 */
void X86_64Assembler::vpor(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
- DCHECK(CpuHasAVXorAVX2FeatureFlag());
- bool is_twobyte_form = false;
- uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
- if (!src2.NeedsRex()) {
- is_twobyte_form = true;
- } else if (!src1.NeedsRex()) {
- return vpor(dst, src2, src1);
- }
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- X86_64ManagedRegister vvvv_reg =
- X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
- ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
- if (is_twobyte_form) {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
- } else {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
- /*X=*/ false,
- src2.NeedsRex(),
- SET_VEX_M_0F);
- ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
- }
- EmitUint8(ByteZero);
- EmitUint8(ByteOne);
- if (!is_twobyte_form) {
- EmitUint8(ByteTwo);
- }
- EmitUint8(0xEB);
- EmitXmmRegisterOperand(dst.LowBits(), src2);
+ EmitVecArithAndLogicalOperation(
+ dst, src1, src2, /*opcode=*/ 0xEB, SET_VEX_PP_66, /*is_commutative=*/ true);
}
/* VEX.128.0F 56 /r VORPS xmm1,xmm2, xmm3/m128 */
void X86_64Assembler::vorps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
- DCHECK(CpuHasAVXorAVX2FeatureFlag());
- bool is_twobyte_form = false;
- uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
- if (!src2.NeedsRex()) {
- is_twobyte_form = true;
- } else if (!src1.NeedsRex()) {
- return vorps(dst, src2, src1);
- }
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- X86_64ManagedRegister vvvv_reg =
- X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
- ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
- if (is_twobyte_form) {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
- } else {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
- /*X=*/ false,
- src2.NeedsRex(),
- SET_VEX_M_0F);
- ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
- }
- EmitUint8(ByteZero);
- EmitUint8(ByteOne);
- if (!is_twobyte_form) {
- EmitUint8(ByteTwo);
- }
- EmitUint8(0x56);
- EmitXmmRegisterOperand(dst.LowBits(), src2);
+ EmitVecArithAndLogicalOperation(
+ dst, src1, src2, /*opcode=*/ 0x56, SET_VEX_PP_NONE, /*is_commutative=*/ true);
}
/* VEX.128.66.0F 56 /r VORPD xmm1,xmm2, xmm3/m128 */
void X86_64Assembler::vorpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
- DCHECK(CpuHasAVXorAVX2FeatureFlag());
- bool is_twobyte_form = false;
- uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
- if (!src2.NeedsRex()) {
- is_twobyte_form = true;
- } else if (!src1.NeedsRex()) {
- return vorpd(dst, src2, src1);
- }
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- X86_64ManagedRegister vvvv_reg =
- X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
- ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
- if (is_twobyte_form) {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
- } else {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
- /*X=*/ false,
- src2.NeedsRex(),
- SET_VEX_M_0F);
- ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
- }
- EmitUint8(ByteZero);
- EmitUint8(ByteOne);
- if (!is_twobyte_form) {
- EmitUint8(ByteTwo);
- }
- EmitUint8(0x56);
- EmitXmmRegisterOperand(dst.LowBits(), src2);
+ EmitVecArithAndLogicalOperation(
+ dst, src1, src2, /*opcode=*/ 0x56, SET_VEX_PP_66, /*is_commutative=*/ true);
}
void X86_64Assembler::pavgb(XmmRegister dst, XmmRegister src) {
@@ -2868,34 +2131,8 @@ void X86_64Assembler::pmaddwd(XmmRegister dst, XmmRegister src) {
}
void X86_64Assembler::vpmaddwd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
- DCHECK(CpuHasAVXorAVX2FeatureFlag());
- bool is_twobyte_form = false;
- uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
- if (!src2.NeedsRex()) {
- is_twobyte_form = true;
- } else if (!src1.NeedsRex()) {
- return vpmaddwd(dst, src2, src1);
- }
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
- X86_64ManagedRegister vvvv_reg =
- X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
- if (is_twobyte_form) {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
- } else {
- ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
- /*X=*/ false,
- src2.NeedsRex(),
- SET_VEX_M_0F);
- ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
- }
- EmitUint8(ByteZero);
- EmitUint8(ByteOne);
- if (!is_twobyte_form) {
- EmitUint8(ByteTwo);
- }
- EmitUint8(0xF5);
- EmitXmmRegisterOperand(dst.LowBits(), src2);
+ EmitVecArithAndLogicalOperation(
+ dst, src1, src2, /*opcode=*/ 0xF5, SET_VEX_PP_66, /*is_commutative=*/ true);
}
void X86_64Assembler::phaddw(XmmRegister dst, XmmRegister src) {
@@ -5529,5 +4766,35 @@ uint8_t X86_64Assembler::EmitVexPrefixByteTwo(bool W,
return vex_prefix;
}
+void X86_64Assembler::EmitVecArithAndLogicalOperation(XmmRegister dst,
+ XmmRegister src1,
+ XmmRegister src2,
+ uint8_t opcode,
+ int vex_pp,
+ bool is_commutative) {
+ if (is_commutative && src2.NeedsRex() && !src1.NeedsRex()) {
+ return EmitVecArithAndLogicalOperation(dst, src2, src1, opcode, vex_pp, is_commutative);
+ }
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ X86_64ManagedRegister vvvv_reg = X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
+ bool is_twobyte_form = !src2.NeedsRex();
+ uint8_t byte_zero = EmitVexPrefixByteZero(is_twobyte_form);
+ uint8_t byte_one, byte_two;
+ if (is_twobyte_form) {
+ byte_one = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, vex_pp);
+ } else {
+ byte_one = EmitVexPrefixByteOne(dst.NeedsRex(), /*X=*/ false, src2.NeedsRex(), SET_VEX_M_0F);
+ byte_two = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, vex_pp);
+ }
+ EmitUint8(byte_zero);
+ EmitUint8(byte_one);
+ if (!is_twobyte_form) {
+ EmitUint8(byte_two);
+ }
+ EmitUint8(opcode);
+ EmitXmmRegisterOperand(dst.LowBits(), src2);
+}
+
} // namespace x86_64
} // namespace art
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 5f6d754f8d..2c3b3c44ee 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -1185,6 +1185,13 @@ class X86_64Assembler final : public Assembler {
int SET_VEX_L,
int SET_VEX_PP);
+ void EmitVecArithAndLogicalOperation(XmmRegister dst,
+ XmmRegister src1,
+ XmmRegister src2,
+ uint8_t opcode,
+ int vex_pp,
+ bool is_commutative = false);
+
// Helper function to emit a shorter variant of XCHG if at least one operand is RAX/EAX/AX.
bool try_xchg_rax(CpuRegister dst,
CpuRegister src,