diff options
| author | 2025-01-03 18:01:28 -0800 | |
|---|---|---|
| committer | 2025-01-03 18:01:28 -0800 | |
| commit | fc20f5dacc66c03d0634bb79bf9318e9f8467dcc (patch) | |
| tree | 1c0e3df6cff6dbf1a6e24302c7ffe8a4739ee68a /compiler | |
| parent | ef26438a09c6c19d51be82555a2870b1bfe2d50f (diff) | |
| parent | d34713915fa125c29c0db5500a533b7bc0f7167e (diff) | |
Snap for 12868993 from d34713915fa125c29c0db5500a533b7bc0f7167e to 25Q2-release
Change-Id: Ibd5023fa2ed3e2be8668fb9c81f31b21630d2a3d
Diffstat (limited to 'compiler')
| -rw-r--r-- | compiler/optimizing/intrinsics.h | 2 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_arm64.cc | 2 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_arm_vixl.cc | 2 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_riscv64.cc | 2 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_utils.h | 2 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_x86_64.cc | 2 | ||||
| -rw-r--r-- | compiler/optimizing/superblock_cloner.cc | 64 | ||||
| -rw-r--r-- | compiler/optimizing/superblock_cloner.h | 50 | ||||
| -rw-r--r-- | compiler/optimizing/superblock_cloner_test.cc | 46 | ||||
| -rw-r--r-- | compiler/utils/x86_64/assembler_x86_64.cc | 891 | ||||
| -rw-r--r-- | compiler/utils/x86_64/assembler_x86_64.h | 7 |
11 files changed, 103 insertions, 967 deletions
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h index 7a27b2506b..4f164e10c6 100644 --- a/compiler/optimizing/intrinsics.h +++ b/compiler/optimizing/intrinsics.h @@ -270,7 +270,7 @@ class VarHandleOptimizations : public IntrinsicOptimizations { INTRINSIC_OPTIMIZATION(DoNotIntrinsify, 0); // One of the checks is statically known to fail. INTRINSIC_OPTIMIZATION(SkipObjectNullCheck, 1); // Not applicable for static fields. - // Use known `VarHandle` from the boot image. To apply this optimization, the following + // Use known `VarHandle` from the boot/app image. To apply this optimization, the following // `VarHandle` checks must pass based on static analysis: // - `VarHandle` type check (must match the coordinate count), // - access mode check, diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index d12b2f7981..a4463cb248 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -4863,7 +4863,7 @@ static void GenerateVarHandleTarget(HInvoke* invoke, if (expected_coordinates_count <= 1u) { if (VarHandleOptimizations(invoke).GetUseKnownImageVarHandle()) { ScopedObjectAccess soa(Thread::Current()); - ArtField* target_field = GetBootImageVarHandleField(invoke); + ArtField* target_field = GetImageVarHandleField(invoke); if (expected_coordinates_count == 0u) { ObjPtr<mirror::Class> declaring_class = target_field->GetDeclaringClass(); if (Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(declaring_class)) { diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index 04793b4f66..db7457a68b 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -4597,7 +4597,7 @@ static void GenerateVarHandleTarget(HInvoke* invoke, if (expected_coordinates_count <= 1u) { if (VarHandleOptimizations(invoke).GetUseKnownImageVarHandle()) { ScopedObjectAccess soa(Thread::Current()); - ArtField* target_field = GetBootImageVarHandleField(invoke); + ArtField* target_field = GetImageVarHandleField(invoke); if (expected_coordinates_count == 0u) { ObjPtr<mirror::Class> declaring_class = target_field->GetDeclaringClass(); if (Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(declaring_class)) { diff --git a/compiler/optimizing/intrinsics_riscv64.cc b/compiler/optimizing/intrinsics_riscv64.cc index 32816b5f4b..65776236c1 100644 --- a/compiler/optimizing/intrinsics_riscv64.cc +++ b/compiler/optimizing/intrinsics_riscv64.cc @@ -3774,7 +3774,7 @@ static void GenerateVarHandleTarget(HInvoke* invoke, if (expected_coordinates_count <= 1u) { if (VarHandleOptimizations(invoke).GetUseKnownImageVarHandle()) { ScopedObjectAccess soa(Thread::Current()); - ArtField* target_field = GetBootImageVarHandleField(invoke); + ArtField* target_field = GetImageVarHandleField(invoke); if (expected_coordinates_count == 0u) { ObjPtr<mirror::Class> declaring_class = target_field->GetDeclaringClass(); if (Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(declaring_class)) { diff --git a/compiler/optimizing/intrinsics_utils.h b/compiler/optimizing/intrinsics_utils.h index 6c08cea3f8..c2f32ae708 100644 --- a/compiler/optimizing/intrinsics_utils.h +++ b/compiler/optimizing/intrinsics_utils.h @@ -207,7 +207,7 @@ static inline DataType::Type GetVarHandleExpectedValueType(HInvoke* invoke, } } -static inline ArtField* GetBootImageVarHandleField(HInvoke* invoke) +static inline ArtField* GetImageVarHandleField(HInvoke* invoke) REQUIRES_SHARED(Locks::mutator_lock_) { DCHECK_LE(GetExpectedVarHandleCoordinatesCount(invoke), 1u); DCHECK(VarHandleOptimizations(invoke).GetUseKnownImageVarHandle()); diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index fd74f3f44d..18963bf135 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -4074,7 +4074,7 @@ static void GenerateVarHandleTarget(HInvoke* invoke, if (expected_coordinates_count <= 1u) { if (VarHandleOptimizations(invoke).GetUseKnownImageVarHandle()) { ScopedObjectAccess soa(Thread::Current()); - ArtField* target_field = GetBootImageVarHandleField(invoke); + ArtField* target_field = GetImageVarHandleField(invoke); if (expected_coordinates_count == 0u) { ObjPtr<mirror::Class> declaring_class = target_field->GetDeclaringClass(); __ movl(CpuRegister(target.object), diff --git a/compiler/optimizing/superblock_cloner.cc b/compiler/optimizing/superblock_cloner.cc index 276d2246cb..a7328a1218 100644 --- a/compiler/optimizing/superblock_cloner.cc +++ b/compiler/optimizing/superblock_cloner.cc @@ -227,40 +227,6 @@ void SuperblockCloner::RemapCopyInternalEdge(HBasicBlock* orig_block, } } -bool SuperblockCloner::IsRemapInfoForVersioning() const { - return remap_incoming_->empty() && - remap_orig_internal_->empty() && - remap_copy_internal_->empty(); -} - -void SuperblockCloner::CopyIncomingEdgesForVersioning() { - for (uint32_t orig_block_id : orig_bb_set_.Indexes()) { - HBasicBlock* orig_block = GetBlockById(orig_block_id); - size_t incoming_edge_count = 0; - for (HBasicBlock* orig_pred : orig_block->GetPredecessors()) { - uint32_t orig_pred_id = orig_pred->GetBlockId(); - if (IsInOrigBBSet(orig_pred_id)) { - continue; - } - - HBasicBlock* copy_block = GetBlockCopy(orig_block); - // This corresponds to the requirement on the order of predecessors: all the incoming - // edges must be seen before the internal ones. This is always true for natural loops. - // TODO: remove this requirement. - DCHECK_EQ(orig_block->GetPredecessorIndexOf(orig_pred), incoming_edge_count); - for (HInstructionIterator it(orig_block->GetPhis()); !it.Done(); it.Advance()) { - HPhi* orig_phi = it.Current()->AsPhi(); - HPhi* copy_phi = GetInstrCopy(orig_phi)->AsPhi(); - HInstruction* orig_phi_input = orig_phi->InputAt(incoming_edge_count); - // Add the corresponding input of the original phi to the copy one. - copy_phi->AddInput(orig_phi_input); - } - copy_block->AddPredecessor(orig_pred); - incoming_edge_count++; - } - } -} - // // Local versions of CF calculation/adjustment routines. // @@ -484,12 +450,6 @@ void SuperblockCloner::FindAndSetLocalAreaForAdjustments() { } void SuperblockCloner::RemapEdgesSuccessors() { - // By this stage all the blocks have been copied, copy phis - created with no inputs; - // no copy edges have been created so far. - if (IsRemapInfoForVersioning()) { - CopyIncomingEdgesForVersioning(); - } - // Redirect incoming edges. for (HEdge e : *remap_incoming_) { HBasicBlock* orig_block = GetBlockById(e.GetFrom()); @@ -897,7 +857,7 @@ bool SuperblockCloner::IsSubgraphClonable() const { return true; } -// Checks that loop unrolling/peeling/versioning is being conducted. +// Checks that loop unrolling/peeling is being conducted. bool SuperblockCloner::IsFastCase() const { // Check that all the basic blocks belong to the same loop. bool flag = false; @@ -914,15 +874,11 @@ bool SuperblockCloner::IsFastCase() const { } } - // Check that orig_bb_set_ corresponds to loop peeling/unrolling/versioning. + // Check that orig_bb_set_ corresponds to loop peeling/unrolling. if (common_loop_info == nullptr || !orig_bb_set_.SameBitsSet(&common_loop_info->GetBlocks())) { return false; } - if (IsRemapInfoForVersioning()) { - return true; - } - bool peeling_or_unrolling = false; HEdgeSet remap_orig_internal(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); HEdgeSet remap_copy_internal(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); @@ -1171,9 +1127,6 @@ HBasicBlock* LoopClonerHelper::DoLoopTransformationImpl(TransformationKind trans case TransformationKind::kUnrolling: oss<< "unrolling"; break; - case TransformationKind::kVersioning: - oss << "versioning"; - break; } oss << " was applied to the loop <" << loop_header->GetBlockId() << ">."; LOG(INFO) << oss.str(); @@ -1185,14 +1138,11 @@ HBasicBlock* LoopClonerHelper::DoLoopTransformationImpl(TransformationKind trans HEdgeSet remap_copy_internal(graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); HEdgeSet remap_incoming(graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); - // No remapping needed for loop versioning. - if (transformation != TransformationKind::kVersioning) { - CollectRemappingInfoForPeelUnroll(transformation == TransformationKind::kUnrolling, - loop_info_, - &remap_orig_internal, - &remap_copy_internal, - &remap_incoming); - } + CollectRemappingInfoForPeelUnroll(transformation == TransformationKind::kUnrolling, + loop_info_, + &remap_orig_internal, + &remap_copy_internal, + &remap_incoming); cloner_.SetSuccessorRemappingInfo(&remap_orig_internal, &remap_copy_internal, &remap_incoming); cloner_.Run(); diff --git a/compiler/optimizing/superblock_cloner.h b/compiler/optimizing/superblock_cloner.h index c327867342..d4db0b3852 100644 --- a/compiler/optimizing/superblock_cloner.h +++ b/compiler/optimizing/superblock_cloner.h @@ -90,8 +90,7 @@ inline bool IsEdgeValid(HEdge edge, HGraph* graph) { // fine grain manipulation with IR; data flow and graph properties are resolved/adjusted // automatically. The clone transformation is defined by specifying a set of basic blocks to copy // and a set of rules how to treat edges, remap their successors. By using this approach such -// optimizations as Branch Target Expansion, Loop Peeling, Loop Unrolling, Loop Versioning can be -// implemented. +// optimizations as Branch Target Expansion, Loop Peeling, Loop Unrolling can be implemented. // // The idea of the transformation is based on "Superblock cloning" technique described in the book // "Engineering a Compiler. Second Edition", Keith D. Cooper, Linda Torczon, Rice University @@ -163,7 +162,7 @@ class SuperblockCloner : public ValueObject { // // TODO: formally describe the criteria. // - // Loop peeling, unrolling and versioning satisfy the criteria. + // Loop peeling and unrolling satisfy the criteria. bool IsFastCase() const; // Runs the copy algorithm according to the description. @@ -299,18 +298,6 @@ class SuperblockCloner : public ValueObject { // Remaps copy internal edge to its origin, adjusts the phi inputs in orig_succ. void RemapCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ); - // Checks whether the edges remapping info corresponds to the subgraph versioning case: - // - none of the incoming edges are to be remapped (they are being duplicated). - // - none of the internal edges are to be remapped. - bool IsRemapInfoForVersioning() const; - - // Processes incoming edges for subgraph versioning case: for each incoming edge (X, Y) adds - // an edge (X, Y_1) where Y_1 = Copy(Y) and add corresponding phi input to copy phi. - // - // Note: such node X will now have two successors, its unconditional branch instruction - // will be invalid and should be adjusted to some conditional branch by the client code. - void CopyIncomingEdgesForVersioning(); - // // Local versions of control flow calculation/adjustment routines. // @@ -376,7 +363,7 @@ class SuperblockCloner : public ValueObject { DISALLOW_COPY_AND_ASSIGN(SuperblockCloner); }; -// Helper class to perform loop peeling/unrolling/versioning. +// Helper class to perform loop peeling/unrolling. // // This helper should be used when correspondence map between original and copied // basic blocks/instructions are demanded. @@ -456,40 +443,12 @@ class LoopClonerHelper : public ValueObject { return DoLoopTransformationImpl(TransformationKind::kUnrolling); } - // Perform loop versioning. - // - // Control flow of an example (ignoring critical edges splitting). - // - // Before After - // - // |B| |B| - // | | - // v v - // |1| |1|_________ - // | | | - // v v v - // |2|<-\ |2|<-\ |2A|<-\ - // / \ / / \ / / \ / - // v v/ | v/ | v/ - // | |3| | |3| | |3A| - // | | __________| - // | || - // v vv - // |4| |4| - // | | - // v v - // |E| |E| - HBasicBlock* DoVersioning() { - return DoLoopTransformationImpl(TransformationKind::kVersioning); - } - HLoopInformation* GetRegionToBeAdjusted() const { return cloner_.GetRegionToBeAdjusted(); } protected: enum class TransformationKind { kPeeling, kUnrolling, - kVersioning, }; // Applies a specific loop transformation to the loop. @@ -502,7 +461,7 @@ class LoopClonerHelper : public ValueObject { DISALLOW_COPY_AND_ASSIGN(LoopClonerHelper); }; -// Helper class to perform loop peeling/unrolling/versioning. +// Helper class to perform loop peeling/unrolling. // // This helper should be used when there is no need to get correspondence information between // original and copied basic blocks/instructions. @@ -512,7 +471,6 @@ class LoopClonerSimpleHelper : public ValueObject { bool IsLoopClonable() const { return helper_.IsLoopClonable(); } HBasicBlock* DoPeeling() { return helper_.DoPeeling(); } HBasicBlock* DoUnrolling() { return helper_.DoUnrolling(); } - HBasicBlock* DoVersioning() { return helper_.DoVersioning(); } HLoopInformation* GetRegionToBeAdjusted() const { return helper_.GetRegionToBeAdjusted(); } const SuperblockCloner::HBasicBlockMap* GetBasicBlockMap() const { return &bb_map_; } diff --git a/compiler/optimizing/superblock_cloner_test.cc b/compiler/optimizing/superblock_cloner_test.cc index 5190dae033..1bef8a4e9d 100644 --- a/compiler/optimizing/superblock_cloner_test.cc +++ b/compiler/optimizing/superblock_cloner_test.cc @@ -301,52 +301,6 @@ TEST_F(SuperblockClonerTest, LoopUnrolling) { EXPECT_EQ(loop_info->GetBackEdges()[0], bb_map.Get(loop_body)); } -// Tests SuperblockCloner for loop versioning case. -// -// See an ASCII graphics example near LoopClonerHelper::DoVersioning. -TEST_F(SuperblockClonerTest, LoopVersioning) { - HBasicBlock* return_block = InitGraphAndParameters(); - auto [preheader, header, loop_body] = CreateWhileLoop(return_block); - CreateBasicLoopDataFlow(header, loop_body); - graph_->BuildDominatorTree(); - EXPECT_TRUE(CheckGraph()); - - HBasicBlockMap bb_map( - std::less<HBasicBlock*>(), graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); - HInstructionMap hir_map( - std::less<HInstruction*>(), graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); - - HLoopInformation* loop_info = header->GetLoopInformation(); - HBasicBlock* original_preheader = loop_info->GetPreHeader(); - LoopClonerHelper helper(loop_info, &bb_map, &hir_map, /* induction_range= */ nullptr); - EXPECT_TRUE(helper.IsLoopClonable()); - HBasicBlock* new_header = helper.DoVersioning(); - EXPECT_EQ(header, new_header); - - EXPECT_TRUE(CheckGraph()); - - HBasicBlock* second_header = bb_map.Get(header); - HBasicBlock* second_body = bb_map.Get(loop_body); - HLoopInformation* second_loop_info = second_header->GetLoopInformation(); - - // Check loop body successors. - EXPECT_EQ(loop_body->GetSingleSuccessor(), header); - EXPECT_EQ(second_body->GetSingleSuccessor(), second_header); - - // Check loop structure. - EXPECT_EQ(loop_info, header->GetLoopInformation()); - EXPECT_EQ(loop_info->GetHeader(), header); - EXPECT_EQ(second_loop_info->GetHeader(), second_header); - - EXPECT_EQ(loop_info->GetBackEdges().size(), 1u); - EXPECT_EQ(second_loop_info->GetBackEdges().size(), 1u); - - EXPECT_EQ(loop_info->GetBackEdges()[0], loop_body); - EXPECT_EQ(second_loop_info->GetBackEdges()[0], second_body); - - EXPECT_EQ(original_preheader->GetSuccessors().size(), 2u); -} - // Checks that loop unrolling works fine for a loop with multiple back edges. Tests that after // the transformation the loop has a single preheader. TEST_F(SuperblockClonerTest, LoopPeelingMultipleBackEdges) { diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index 959e291dc9..6330cc5d62 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -736,59 +736,12 @@ void X86_64Assembler::subps(XmmRegister dst, XmmRegister src) { } void X86_64Assembler::vaddps(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) { - DCHECK(CpuHasAVXorAVX2FeatureFlag()); - bool is_twobyte_form = false; - uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; - if (!add_right.NeedsRex()) { - is_twobyte_form = true; - } else if (!add_left.NeedsRex()) { - return vaddps(dst, add_right, add_left); - } - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - X86_64ManagedRegister vvvv_reg = - X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister()); - ByteZero = EmitVexPrefixByteZero(is_twobyte_form); - if (is_twobyte_form) { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE); - } else { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), - /*X=*/ false, - add_right.NeedsRex(), - SET_VEX_M_0F); - ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE); - } - EmitUint8(ByteZero); - EmitUint8(ByteOne); - if (!is_twobyte_form) { - EmitUint8(ByteTwo); - } - EmitUint8(0x58); - EmitXmmRegisterOperand(dst.LowBits(), add_right); + EmitVecArithAndLogicalOperation( + dst, add_left, add_right, /*opcode=*/ 0x58, SET_VEX_PP_NONE, /*is_commutative=*/ true); } void X86_64Assembler::vsubps(XmmRegister dst, XmmRegister src1, XmmRegister src2) { - DCHECK(CpuHasAVXorAVX2FeatureFlag()); - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - bool is_twobyte_form = false; - uint8_t byte_zero = 0x00, byte_one = 0x00, byte_two = 0x00; - if (!src2.NeedsRex()) { - is_twobyte_form = true; - } - byte_zero = EmitVexPrefixByteZero(is_twobyte_form); - X86_64ManagedRegister vvvv_reg = X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister()); - if (is_twobyte_form) { - byte_one = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE); - } else { - byte_one = EmitVexPrefixByteOne(dst.NeedsRex(), /*X=*/ false, src2.NeedsRex(), SET_VEX_M_0F); - byte_two = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE); - } - EmitUint8(byte_zero); - EmitUint8(byte_one); - if (!is_twobyte_form) { - EmitUint8(byte_two); - } - EmitUint8(0x5C); - EmitXmmRegisterOperand(dst.LowBits(), src2); + EmitVecArithAndLogicalOperation(dst, src1, src2, /*opcode=*/ 0x5C, SET_VEX_PP_NONE); } @@ -801,34 +754,8 @@ void X86_64Assembler::mulps(XmmRegister dst, XmmRegister src) { } void X86_64Assembler::vmulps(XmmRegister dst, XmmRegister src1, XmmRegister src2) { - DCHECK(CpuHasAVXorAVX2FeatureFlag()); - bool is_twobyte_form = false; - uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; - if (!src2.NeedsRex()) { - is_twobyte_form = true; - } else if (!src1.NeedsRex()) { - return vmulps(dst, src2, src1); - } - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - ByteZero = EmitVexPrefixByteZero(is_twobyte_form); - X86_64ManagedRegister vvvv_reg = - X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister()); - if (is_twobyte_form) { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE); - } else { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), - /*X=*/ false, - src2.NeedsRex(), - SET_VEX_M_0F); - ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE); - } - EmitUint8(ByteZero); - EmitUint8(ByteOne); - if (!is_twobyte_form) { - EmitUint8(ByteTwo); - } - EmitUint8(0x59); - EmitXmmRegisterOperand(dst.LowBits(), src2); + EmitVecArithAndLogicalOperation( + dst, src1, src2, /*opcode=*/ 0x59, SET_VEX_PP_NONE, /*is_commutative=*/ true); } void X86_64Assembler::divps(XmmRegister dst, XmmRegister src) { @@ -840,32 +767,7 @@ void X86_64Assembler::divps(XmmRegister dst, XmmRegister src) { } void X86_64Assembler::vdivps(XmmRegister dst, XmmRegister src1, XmmRegister src2) { - DCHECK(CpuHasAVXorAVX2FeatureFlag()); - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - bool is_twobyte_form = false; - uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; - if (!src2.NeedsRex()) { - is_twobyte_form = true; - } - ByteZero = EmitVexPrefixByteZero(is_twobyte_form); - X86_64ManagedRegister vvvv_reg = - X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister()); - if (is_twobyte_form) { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE); - } else { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), - /*X=*/ false, - src2.NeedsRex(), - SET_VEX_M_0F); - ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE); - } - EmitUint8(ByteZero); - EmitUint8(ByteOne); - if (!is_twobyte_form) { - EmitUint8(ByteTwo); - } - EmitUint8(0x5E); - EmitXmmRegisterOperand(dst.LowBits(), src2); + EmitVecArithAndLogicalOperation(dst, src1, src2, /*opcode=*/ 0x5E, SET_VEX_PP_NONE); } void X86_64Assembler::vfmadd213ss(XmmRegister acc, XmmRegister left, XmmRegister right) { @@ -1209,33 +1111,8 @@ void X86_64Assembler::addpd(XmmRegister dst, XmmRegister src) { void X86_64Assembler::vaddpd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) { - bool is_twobyte_form = false; - uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; - if (!add_right.NeedsRex()) { - is_twobyte_form = true; - } else if (!add_left.NeedsRex()) { - return vaddpd(dst, add_right, add_left); - } - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - ByteZero = EmitVexPrefixByteZero(is_twobyte_form); - X86_64ManagedRegister vvvv_reg = - X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister()); - if (is_twobyte_form) { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); - } else { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), - /*X=*/ false, - add_right.NeedsRex(), - SET_VEX_M_0F); - ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); - } - EmitUint8(ByteZero); - EmitUint8(ByteOne); - if (!is_twobyte_form) { - EmitUint8(ByteTwo); - } - EmitUint8(0x58); - EmitXmmRegisterOperand(dst.LowBits(), add_right); + EmitVecArithAndLogicalOperation( + dst, add_left, add_right, /*opcode=*/ 0x58, SET_VEX_PP_66, /*is_commutative=*/ true); } @@ -1250,31 +1127,7 @@ void X86_64Assembler::subpd(XmmRegister dst, XmmRegister src) { void X86_64Assembler::vsubpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) { - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - bool is_twobyte_form = false; - uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; - if (!src2.NeedsRex()) { - is_twobyte_form = true; - } - ByteZero = EmitVexPrefixByteZero(is_twobyte_form); - X86_64ManagedRegister vvvv_reg = - X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister()); - if (is_twobyte_form) { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); - } else { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), - /*X=*/ false, - src2.NeedsRex(), - SET_VEX_M_0F); - ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); - } - EmitUint8(ByteZero); - EmitUint8(ByteOne); - if (!is_twobyte_form) { - EmitUint8(ByteTwo); - } - EmitUint8(0x5C); - EmitXmmRegisterOperand(dst.LowBits(), src2); + EmitVecArithAndLogicalOperation(dst, src1, src2, /*opcode=*/ 0x5C, SET_VEX_PP_66); } @@ -1288,34 +1141,8 @@ void X86_64Assembler::mulpd(XmmRegister dst, XmmRegister src) { } void X86_64Assembler::vmulpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) { - DCHECK(CpuHasAVXorAVX2FeatureFlag()); - bool is_twobyte_form = false; - uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; - if (!src2.NeedsRex()) { - is_twobyte_form = true; - } else if (!src1.NeedsRex()) { - return vmulpd(dst, src2, src1); - } - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - ByteZero = EmitVexPrefixByteZero(is_twobyte_form); - X86_64ManagedRegister vvvv_reg = - X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister()); - if (is_twobyte_form) { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); - } else { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), - /*X=*/ false, - src2.NeedsRex(), - SET_VEX_M_0F); - ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); - } - EmitUint8(ByteZero); - EmitUint8(ByteOne); - if (!is_twobyte_form) { - EmitUint8(ByteTwo); - } - EmitUint8(0x59); - EmitXmmRegisterOperand(dst.LowBits(), src2); + EmitVecArithAndLogicalOperation( + dst, src1, src2, /*opcode=*/ 0x59, SET_VEX_PP_66, /*is_commutative=*/ true); } void X86_64Assembler::divpd(XmmRegister dst, XmmRegister src) { @@ -1329,32 +1156,7 @@ void X86_64Assembler::divpd(XmmRegister dst, XmmRegister src) { void X86_64Assembler::vdivpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) { - DCHECK(CpuHasAVXorAVX2FeatureFlag()); - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - bool is_twobyte_form = false; - uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; - if (!src2.NeedsRex()) { - is_twobyte_form = true; - } - ByteZero = EmitVexPrefixByteZero(is_twobyte_form); - X86_64ManagedRegister vvvv_reg = - X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister()); - if (is_twobyte_form) { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); - } else { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), - /*X=*/ false, - src2.NeedsRex(), - SET_VEX_M_0F); - ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); - } - EmitUint8(ByteZero); - EmitUint8(ByteOne); - if (!is_twobyte_form) { - EmitUint8(ByteTwo); - } - EmitUint8(0x5E); - EmitXmmRegisterOperand(dst.LowBits(), src2); + EmitVecArithAndLogicalOperation(dst, src1, src2, /*opcode=*/ 0x5E, SET_VEX_PP_66); } @@ -1531,34 +1333,8 @@ void X86_64Assembler::paddb(XmmRegister dst, XmmRegister src) { void X86_64Assembler::vpaddb(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) { - DCHECK(CpuHasAVXorAVX2FeatureFlag()); - uint8_t ByteOne = 0x00, ByteZero = 0x00, ByteTwo = 0x00; - bool is_twobyte_form = false; - if (!add_right.NeedsRex()) { - is_twobyte_form = true; - } else if (!add_left.NeedsRex()) { - return vpaddb(dst, add_right, add_left); - } - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - ByteZero = EmitVexPrefixByteZero(is_twobyte_form); - X86_64ManagedRegister vvvv_reg = - X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister()); - if (is_twobyte_form) { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); - } else { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), - /*X=*/ false, - add_right.NeedsRex(), - SET_VEX_M_0F); - ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); - } - EmitUint8(ByteZero); - EmitUint8(ByteOne); - if (!is_twobyte_form) { - EmitUint8(ByteTwo); - } - EmitUint8(0xFC); - EmitXmmRegisterOperand(dst.LowBits(), add_right); + EmitVecArithAndLogicalOperation( + dst, add_left, add_right, /*opcode=*/ 0xFC, SET_VEX_PP_66, /*is_commutative=*/ true); } @@ -1573,32 +1349,7 @@ void X86_64Assembler::psubb(XmmRegister dst, XmmRegister src) { void X86_64Assembler::vpsubb(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) { - DCHECK(CpuHasAVXorAVX2FeatureFlag()); - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - bool is_twobyte_form = false; - uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; - if (!add_right.NeedsRex()) { - is_twobyte_form = true; - } - ByteZero = EmitVexPrefixByteZero(is_twobyte_form); - X86_64ManagedRegister vvvv_reg = - X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister()); - if (is_twobyte_form) { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); - } else { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), - /*X=*/ false, - add_right.NeedsRex(), - SET_VEX_M_0F); - ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); - } - EmitUint8(ByteZero); - EmitUint8(ByteOne); - if (!is_twobyte_form) { - EmitUint8(ByteTwo); - } - EmitUint8(0xF8); - EmitXmmRegisterOperand(dst.LowBits(), add_right); + EmitVecArithAndLogicalOperation(dst, add_left, add_right, /*opcode=*/ 0xF8, SET_VEX_PP_66); } @@ -1612,34 +1363,8 @@ void X86_64Assembler::paddw(XmmRegister dst, XmmRegister src) { } void X86_64Assembler::vpaddw(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) { - DCHECK(CpuHasAVXorAVX2FeatureFlag()); - bool is_twobyte_form = false; - uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; - if (!add_right.NeedsRex()) { - is_twobyte_form = true; - } else if (!add_left.NeedsRex()) { - return vpaddw(dst, add_right, add_left); - } - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - ByteZero = EmitVexPrefixByteZero(is_twobyte_form); - X86_64ManagedRegister vvvv_reg = - X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister()); - if (is_twobyte_form) { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); - } else { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), - /*X=*/ false, - add_right.NeedsRex(), - SET_VEX_M_0F); - ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); - } - EmitUint8(ByteZero); - EmitUint8(ByteOne); - if (!is_twobyte_form) { - EmitUint8(ByteTwo); - } - EmitUint8(0xFD); - EmitXmmRegisterOperand(dst.LowBits(), add_right); + EmitVecArithAndLogicalOperation( + dst, add_left, add_right, /*opcode=*/ 0xFD, SET_VEX_PP_66, /*is_commutative=*/ true); } @@ -1653,32 +1378,7 @@ void X86_64Assembler::psubw(XmmRegister dst, XmmRegister src) { } void X86_64Assembler::vpsubw(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) { - DCHECK(CpuHasAVXorAVX2FeatureFlag()); - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - bool is_twobyte_form = false; - uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; - if (!add_right.NeedsRex()) { - is_twobyte_form = true; - } - ByteZero = EmitVexPrefixByteZero(is_twobyte_form); - X86_64ManagedRegister vvvv_reg = - X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister()); - if (is_twobyte_form) { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); - } else { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), - /*X=*/ false, - add_right.NeedsRex(), - SET_VEX_M_0F); - ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); - } - EmitUint8(ByteZero); - EmitUint8(ByteOne); - if (!is_twobyte_form) { - EmitUint8(ByteTwo); - } - EmitUint8(0xF9); - EmitXmmRegisterOperand(dst.LowBits(), add_right); + EmitVecArithAndLogicalOperation(dst, add_left, add_right, /*opcode=*/ 0xF9, SET_VEX_PP_66); } @@ -1692,34 +1392,8 @@ void X86_64Assembler::pmullw(XmmRegister dst, XmmRegister src) { } void X86_64Assembler::vpmullw(XmmRegister dst, XmmRegister src1, XmmRegister src2) { - DCHECK(CpuHasAVXorAVX2FeatureFlag()); - bool is_twobyte_form = false; - uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; - if (!src2.NeedsRex()) { - is_twobyte_form = true; - } else if (!src1.NeedsRex()) { - return vpmullw(dst, src2, src1); - } - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - ByteZero = EmitVexPrefixByteZero(is_twobyte_form); - X86_64ManagedRegister vvvv_reg = - X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister()); - if (is_twobyte_form) { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); - } else { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), - /*X=*/ false, - src2.NeedsRex(), - SET_VEX_M_0F); - ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); - } - EmitUint8(ByteZero); - EmitUint8(ByteOne); - if (!is_twobyte_form) { - EmitUint8(ByteTwo); - } - EmitUint8(0xD5); - EmitXmmRegisterOperand(dst.LowBits(), src2); + EmitVecArithAndLogicalOperation( + dst, src1, src2, /*opcode=*/ 0xD5, SET_VEX_PP_66, /*is_commutative=*/ true); } void X86_64Assembler::paddd(XmmRegister dst, XmmRegister src) { @@ -1732,34 +1406,8 @@ void X86_64Assembler::paddd(XmmRegister dst, XmmRegister src) { } void X86_64Assembler::vpaddd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) { - DCHECK(CpuHasAVXorAVX2FeatureFlag()); - bool is_twobyte_form = false; - uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; - if (!add_right.NeedsRex()) { - is_twobyte_form = true; - } else if (!add_left.NeedsRex()) { - return vpaddd(dst, add_right, add_left); - } - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - ByteZero = EmitVexPrefixByteZero(is_twobyte_form); - X86_64ManagedRegister vvvv_reg = - X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister()); - if (is_twobyte_form) { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); - } else { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), - /*X=*/ false, - add_right.NeedsRex(), - SET_VEX_M_0F); - ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); - } - EmitUint8(ByteZero); - EmitUint8(ByteOne); - if (!is_twobyte_form) { - EmitUint8(ByteTwo); - } - EmitUint8(0xFE); - EmitXmmRegisterOperand(dst.LowBits(), add_right); + EmitVecArithAndLogicalOperation( + dst, add_left, add_right, /*opcode=*/ 0xFE, SET_VEX_PP_66, /*is_commutative=*/ true); } void X86_64Assembler::psubd(XmmRegister dst, XmmRegister src) { @@ -1812,34 +1460,8 @@ void X86_64Assembler::paddq(XmmRegister dst, XmmRegister src) { void X86_64Assembler::vpaddq(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) { - DCHECK(CpuHasAVXorAVX2FeatureFlag()); - bool is_twobyte_form = false; - uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; - if (!add_right.NeedsRex()) { - is_twobyte_form = true; - } else if (!add_left.NeedsRex()) { - return vpaddq(dst, add_right, add_left); - } - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - ByteZero = EmitVexPrefixByteZero(is_twobyte_form); - X86_64ManagedRegister vvvv_reg = - X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister()); - if (is_twobyte_form) { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); - } else { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), - /*X=*/ false, - add_right.NeedsRex(), - SET_VEX_M_0F); - ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); - } - EmitUint8(ByteZero); - EmitUint8(ByteOne); - if (!is_twobyte_form) { - EmitUint8(ByteTwo); - } - EmitUint8(0xD4); - EmitXmmRegisterOperand(dst.LowBits(), add_right); + EmitVecArithAndLogicalOperation( + dst, add_left, add_right, /*opcode=*/ 0xD4, SET_VEX_PP_66, /*is_commutative=*/ true); } @@ -1853,32 +1475,7 @@ void X86_64Assembler::psubq(XmmRegister dst, XmmRegister src) { } void X86_64Assembler::vpsubq(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) { - DCHECK(CpuHasAVXorAVX2FeatureFlag()); - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - bool is_twobyte_form = false; - uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; - if (!add_right.NeedsRex()) { - is_twobyte_form = true; - } - ByteZero = EmitVexPrefixByteZero(is_twobyte_form); - X86_64ManagedRegister vvvv_reg = - X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister()); - if (is_twobyte_form) { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); - } else { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), - /*X=*/ false, - add_right.NeedsRex(), - SET_VEX_M_0F); - ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); - } - EmitUint8(ByteZero); - EmitUint8(ByteOne); - if (!is_twobyte_form) { - EmitUint8(ByteTwo); - } - EmitUint8(0xFB); - EmitXmmRegisterOperand(dst.LowBits(), add_right); + EmitVecArithAndLogicalOperation(dst, add_left, add_right, /*opcode=*/ 0xFB, SET_VEX_PP_66); } @@ -1943,32 +1540,7 @@ void X86_64Assembler::psubsb(XmmRegister dst, XmmRegister src) { void X86_64Assembler::vpsubd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) { - DCHECK(CpuHasAVXorAVX2FeatureFlag()); - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - bool is_twobyte_form = false; - uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; - if (!add_right.NeedsRex()) { - is_twobyte_form = true; - } - ByteZero = EmitVexPrefixByteZero(is_twobyte_form); - X86_64ManagedRegister vvvv_reg = - X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister()); - if (is_twobyte_form) { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); - } else { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), - /*X=*/ false, - add_right.NeedsRex(), - SET_VEX_M_0F); - ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); - } - EmitUint8(ByteZero); - EmitUint8(ByteOne); - if (!is_twobyte_form) { - EmitUint8(ByteTwo); - } - EmitUint8(0xFA); - EmitXmmRegisterOperand(dst.LowBits(), add_right); + EmitVecArithAndLogicalOperation(dst, add_left, add_right, /*opcode=*/ 0xFA, SET_VEX_PP_66); } @@ -2349,98 +1921,20 @@ void X86_64Assembler::pxor(XmmRegister dst, XmmRegister src) { /* VEX.128.66.0F.WIG EF /r VPXOR xmm1, xmm2, xmm3/m128 */ void X86_64Assembler::vpxor(XmmRegister dst, XmmRegister src1, XmmRegister src2) { - DCHECK(CpuHasAVXorAVX2FeatureFlag()); - bool is_twobyte_form = false; - uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; - if (!src2.NeedsRex()) { - is_twobyte_form = true; - } else if (!src1.NeedsRex()) { - return vpxor(dst, src2, src1); - } - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - X86_64ManagedRegister vvvv_reg = - X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister()); - ByteZero = EmitVexPrefixByteZero(is_twobyte_form); - if (is_twobyte_form) { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); - } else { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), - /*X=*/ false, - src2.NeedsRex(), - SET_VEX_M_0F); - ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); - } - EmitUint8(ByteZero); - EmitUint8(ByteOne); - if (!is_twobyte_form) { - EmitUint8(ByteTwo); - } - EmitUint8(0xEF); - EmitXmmRegisterOperand(dst.LowBits(), src2); + EmitVecArithAndLogicalOperation( + dst, src1, src2, /*opcode=*/ 0xEF, SET_VEX_PP_66, /*is_commutative=*/ true); } /* VEX.128.0F.WIG 57 /r VXORPS xmm1,xmm2, xmm3/m128 */ void X86_64Assembler::vxorps(XmmRegister dst, XmmRegister src1, XmmRegister src2) { - DCHECK(CpuHasAVXorAVX2FeatureFlag()); - bool is_twobyte_form = false; - uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; - if (!src2.NeedsRex()) { - is_twobyte_form = true; - } else if (!src1.NeedsRex()) { - return vxorps(dst, src2, src1); - } - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - X86_64ManagedRegister vvvv_reg = - X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister()); - ByteZero = EmitVexPrefixByteZero(is_twobyte_form); - if (is_twobyte_form) { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE); - } else { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), - /*X=*/ false, - src2.NeedsRex(), - SET_VEX_M_0F); - ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE); - } - EmitUint8(ByteZero); - EmitUint8(ByteOne); - if (!is_twobyte_form) { - EmitUint8(ByteTwo); - } - EmitUint8(0x57); - EmitXmmRegisterOperand(dst.LowBits(), src2); + EmitVecArithAndLogicalOperation( + dst, src1, src2, /*opcode=*/ 0x57, SET_VEX_PP_NONE, /*is_commutative=*/ true); } /* VEX.128.66.0F.WIG 57 /r VXORPD xmm1,xmm2, xmm3/m128 */ void X86_64Assembler::vxorpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) { - DCHECK(CpuHasAVXorAVX2FeatureFlag()); - bool is_twobyte_form = false; - uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; - if (!src2.NeedsRex()) { - is_twobyte_form = true; - } else if (!src1.NeedsRex()) { - return vxorpd(dst, src2, src1); - } - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - X86_64ManagedRegister vvvv_reg = - X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister()); - ByteZero = EmitVexPrefixByteZero(is_twobyte_form); - if (is_twobyte_form) { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); - } else { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), - /*X=*/ false, - src2.NeedsRex(), - SET_VEX_M_0F); - ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); - } - EmitUint8(ByteZero); - EmitUint8(ByteOne); - if (!is_twobyte_form) { - EmitUint8(ByteTwo); - } - EmitUint8(0x57); - EmitXmmRegisterOperand(dst.LowBits(), src2); + EmitVecArithAndLogicalOperation( + dst, src1, src2, /*opcode=*/ 0x57, SET_VEX_PP_66, /*is_commutative=*/ true); } void X86_64Assembler::andpd(XmmRegister dst, const Address& src) { @@ -2480,98 +1974,20 @@ void X86_64Assembler::pand(XmmRegister dst, XmmRegister src) { /* VEX.128.66.0F.WIG DB /r VPAND xmm1, xmm2, xmm3/m128 */ void X86_64Assembler::vpand(XmmRegister dst, XmmRegister src1, XmmRegister src2) { - DCHECK(CpuHasAVXorAVX2FeatureFlag()); - bool is_twobyte_form = false; - uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; - if (!src2.NeedsRex()) { - is_twobyte_form = true; - } else if (!src1.NeedsRex()) { - return vpand(dst, src2, src1); - } - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - X86_64ManagedRegister vvvv_reg = - X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister()); - ByteZero = EmitVexPrefixByteZero(is_twobyte_form); - if (is_twobyte_form) { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); - } else { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), - /*X=*/ false, - src2.NeedsRex(), - SET_VEX_M_0F); - ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); - } - EmitUint8(ByteZero); - EmitUint8(ByteOne); - if (!is_twobyte_form) { - EmitUint8(ByteTwo); - } - EmitUint8(0xDB); - EmitXmmRegisterOperand(dst.LowBits(), src2); + EmitVecArithAndLogicalOperation( + dst, src1, src2, /*opcode=*/ 0xDB, SET_VEX_PP_66, /*is_commutative=*/ true); } /* VEX.128.0F 54 /r VANDPS xmm1,xmm2, xmm3/m128 */ void X86_64Assembler::vandps(XmmRegister dst, XmmRegister src1, XmmRegister src2) { - DCHECK(CpuHasAVXorAVX2FeatureFlag()); - bool is_twobyte_form = false; - uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; - if (!src2.NeedsRex()) { - is_twobyte_form = true; - } else if (!src1.NeedsRex()) { - return vandps(dst, src2, src1); - } - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - X86_64ManagedRegister vvvv_reg = - X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister()); - ByteZero = EmitVexPrefixByteZero(is_twobyte_form); - if (is_twobyte_form) { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE); - } else { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), - /*X=*/ false, - src2.NeedsRex(), - SET_VEX_M_0F); - ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE); - } - EmitUint8(ByteZero); - EmitUint8(ByteOne); - if (!is_twobyte_form) { - EmitUint8(ByteTwo); - } - EmitUint8(0x54); - EmitXmmRegisterOperand(dst.LowBits(), src2); + EmitVecArithAndLogicalOperation( + dst, src1, src2, /*opcode=*/ 0x54, SET_VEX_PP_NONE, /*is_commutative=*/ true); } /* VEX.128.66.0F 54 /r VANDPD xmm1, xmm2, xmm3/m128 */ void X86_64Assembler::vandpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) { - DCHECK(CpuHasAVXorAVX2FeatureFlag()); - bool is_twobyte_form = false; - uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; - if (!src2.NeedsRex()) { - is_twobyte_form = true; - } else if (!src1.NeedsRex()) { - return vandpd(dst, src2, src1); - } - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - X86_64ManagedRegister vvvv_reg = - X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister()); - ByteZero = EmitVexPrefixByteZero(is_twobyte_form); - if (is_twobyte_form) { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); - } else { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), - /*X=*/ false, - src2.NeedsRex(), - SET_VEX_M_0F); - ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); - } - EmitUint8(ByteZero); - EmitUint8(ByteOne); - if (!is_twobyte_form) { - EmitUint8(ByteTwo); - } - EmitUint8(0x54); - EmitXmmRegisterOperand(dst.LowBits(), src2); + EmitVecArithAndLogicalOperation( + dst, src1, src2, /*opcode=*/ 0x54, SET_VEX_PP_66, /*is_commutative=*/ true); } void X86_64Assembler::andn(CpuRegister dst, CpuRegister src1, CpuRegister src2) { @@ -2621,92 +2037,17 @@ void X86_64Assembler::pandn(XmmRegister dst, XmmRegister src) { /* VEX.128.66.0F.WIG DF /r VPANDN xmm1, xmm2, xmm3/m128 */ void X86_64Assembler::vpandn(XmmRegister dst, XmmRegister src1, XmmRegister src2) { - DCHECK(CpuHasAVXorAVX2FeatureFlag()); - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - bool is_twobyte_form = false; - uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; - if (!src2.NeedsRex()) { - is_twobyte_form = true; - } - X86_64ManagedRegister vvvv_reg = - X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister()); - ByteZero = EmitVexPrefixByteZero(is_twobyte_form); - if (is_twobyte_form) { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); - } else { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), - /*X=*/ false, - src2.NeedsRex(), - SET_VEX_M_0F); - ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); - } - EmitUint8(ByteZero); - EmitUint8(ByteOne); - if (!is_twobyte_form) { - EmitUint8(ByteTwo); - } - EmitUint8(0xDF); - EmitXmmRegisterOperand(dst.LowBits(), src2); + EmitVecArithAndLogicalOperation(dst, src1, src2, /*opcode=*/ 0xDF, SET_VEX_PP_66); } /* VEX.128.0F 55 /r VANDNPS xmm1, xmm2, xmm3/m128 */ void X86_64Assembler::vandnps(XmmRegister dst, XmmRegister src1, XmmRegister src2) { - DCHECK(CpuHasAVXorAVX2FeatureFlag()); - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - bool is_twobyte_form = false; - uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; - if (!src2.NeedsRex()) { - is_twobyte_form = true; - } - X86_64ManagedRegister vvvv_reg = - X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister()); - ByteZero = EmitVexPrefixByteZero(is_twobyte_form); - if (is_twobyte_form) { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE); - } else { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), - /*X=*/ false, - src2.NeedsRex(), - SET_VEX_M_0F); - ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE); - } - EmitUint8(ByteZero); - EmitUint8(ByteOne); - if (!is_twobyte_form) { - EmitUint8(ByteTwo); - } - EmitUint8(0x55); - EmitXmmRegisterOperand(dst.LowBits(), src2); + EmitVecArithAndLogicalOperation(dst, src1, src2, /*opcode=*/ 0x55, SET_VEX_PP_NONE); } /* VEX.128.66.0F 55 /r VANDNPD xmm1, xmm2, xmm3/m128 */ void X86_64Assembler::vandnpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) { - DCHECK(CpuHasAVXorAVX2FeatureFlag()); - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - bool is_twobyte_form = false; - uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; - if (!src2.NeedsRex()) { - is_twobyte_form = true; - } - X86_64ManagedRegister vvvv_reg = - X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister()); - ByteZero = EmitVexPrefixByteZero(is_twobyte_form); - if (is_twobyte_form) { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); - } else { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), - /*X=*/ false, - src2.NeedsRex(), - SET_VEX_M_0F); - ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); - } - EmitUint8(ByteZero); - EmitUint8(ByteOne); - if (!is_twobyte_form) { - EmitUint8(ByteTwo); - } - EmitUint8(0x55); - EmitXmmRegisterOperand(dst.LowBits(), src2); + EmitVecArithAndLogicalOperation(dst, src1, src2, /*opcode=*/ 0x55, SET_VEX_PP_66); } void X86_64Assembler::orpd(XmmRegister dst, XmmRegister src) { @@ -2737,98 +2078,20 @@ void X86_64Assembler::por(XmmRegister dst, XmmRegister src) { /* VEX.128.66.0F.WIG EB /r VPOR xmm1, xmm2, xmm3/m128 */ void X86_64Assembler::vpor(XmmRegister dst, XmmRegister src1, XmmRegister src2) { - DCHECK(CpuHasAVXorAVX2FeatureFlag()); - bool is_twobyte_form = false; - uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; - if (!src2.NeedsRex()) { - is_twobyte_form = true; - } else if (!src1.NeedsRex()) { - return vpor(dst, src2, src1); - } - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - X86_64ManagedRegister vvvv_reg = - X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister()); - ByteZero = EmitVexPrefixByteZero(is_twobyte_form); - if (is_twobyte_form) { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); - } else { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), - /*X=*/ false, - src2.NeedsRex(), - SET_VEX_M_0F); - ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); - } - EmitUint8(ByteZero); - EmitUint8(ByteOne); - if (!is_twobyte_form) { - EmitUint8(ByteTwo); - } - EmitUint8(0xEB); - EmitXmmRegisterOperand(dst.LowBits(), src2); + EmitVecArithAndLogicalOperation( + dst, src1, src2, /*opcode=*/ 0xEB, SET_VEX_PP_66, /*is_commutative=*/ true); } /* VEX.128.0F 56 /r VORPS xmm1,xmm2, xmm3/m128 */ void X86_64Assembler::vorps(XmmRegister dst, XmmRegister src1, XmmRegister src2) { - DCHECK(CpuHasAVXorAVX2FeatureFlag()); - bool is_twobyte_form = false; - uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; - if (!src2.NeedsRex()) { - is_twobyte_form = true; - } else if (!src1.NeedsRex()) { - return vorps(dst, src2, src1); - } - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - X86_64ManagedRegister vvvv_reg = - X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister()); - ByteZero = EmitVexPrefixByteZero(is_twobyte_form); - if (is_twobyte_form) { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE); - } else { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), - /*X=*/ false, - src2.NeedsRex(), - SET_VEX_M_0F); - ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE); - } - EmitUint8(ByteZero); - EmitUint8(ByteOne); - if (!is_twobyte_form) { - EmitUint8(ByteTwo); - } - EmitUint8(0x56); - EmitXmmRegisterOperand(dst.LowBits(), src2); + EmitVecArithAndLogicalOperation( + dst, src1, src2, /*opcode=*/ 0x56, SET_VEX_PP_NONE, /*is_commutative=*/ true); } /* VEX.128.66.0F 56 /r VORPD xmm1,xmm2, xmm3/m128 */ void X86_64Assembler::vorpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) { - DCHECK(CpuHasAVXorAVX2FeatureFlag()); - bool is_twobyte_form = false; - uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; - if (!src2.NeedsRex()) { - is_twobyte_form = true; - } else if (!src1.NeedsRex()) { - return vorpd(dst, src2, src1); - } - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - X86_64ManagedRegister vvvv_reg = - X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister()); - ByteZero = EmitVexPrefixByteZero(is_twobyte_form); - if (is_twobyte_form) { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); - } else { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), - /*X=*/ false, - src2.NeedsRex(), - SET_VEX_M_0F); - ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); - } - EmitUint8(ByteZero); - EmitUint8(ByteOne); - if (!is_twobyte_form) { - EmitUint8(ByteTwo); - } - EmitUint8(0x56); - EmitXmmRegisterOperand(dst.LowBits(), src2); + EmitVecArithAndLogicalOperation( + dst, src1, src2, /*opcode=*/ 0x56, SET_VEX_PP_66, /*is_commutative=*/ true); } void X86_64Assembler::pavgb(XmmRegister dst, XmmRegister src) { @@ -2868,34 +2131,8 @@ void X86_64Assembler::pmaddwd(XmmRegister dst, XmmRegister src) { } void X86_64Assembler::vpmaddwd(XmmRegister dst, XmmRegister src1, XmmRegister src2) { - DCHECK(CpuHasAVXorAVX2FeatureFlag()); - bool is_twobyte_form = false; - uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; - if (!src2.NeedsRex()) { - is_twobyte_form = true; - } else if (!src1.NeedsRex()) { - return vpmaddwd(dst, src2, src1); - } - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - ByteZero = EmitVexPrefixByteZero(is_twobyte_form); - X86_64ManagedRegister vvvv_reg = - X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister()); - if (is_twobyte_form) { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); - } else { - ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), - /*X=*/ false, - src2.NeedsRex(), - SET_VEX_M_0F); - ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); - } - EmitUint8(ByteZero); - EmitUint8(ByteOne); - if (!is_twobyte_form) { - EmitUint8(ByteTwo); - } - EmitUint8(0xF5); - EmitXmmRegisterOperand(dst.LowBits(), src2); + EmitVecArithAndLogicalOperation( + dst, src1, src2, /*opcode=*/ 0xF5, SET_VEX_PP_66, /*is_commutative=*/ true); } void X86_64Assembler::phaddw(XmmRegister dst, XmmRegister src) { @@ -5529,5 +4766,35 @@ uint8_t X86_64Assembler::EmitVexPrefixByteTwo(bool W, return vex_prefix; } +void X86_64Assembler::EmitVecArithAndLogicalOperation(XmmRegister dst, + XmmRegister src1, + XmmRegister src2, + uint8_t opcode, + int vex_pp, + bool is_commutative) { + if (is_commutative && src2.NeedsRex() && !src1.NeedsRex()) { + return EmitVecArithAndLogicalOperation(dst, src2, src1, opcode, vex_pp, is_commutative); + } + DCHECK(CpuHasAVXorAVX2FeatureFlag()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + X86_64ManagedRegister vvvv_reg = X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister()); + bool is_twobyte_form = !src2.NeedsRex(); + uint8_t byte_zero = EmitVexPrefixByteZero(is_twobyte_form); + uint8_t byte_one, byte_two; + if (is_twobyte_form) { + byte_one = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, vex_pp); + } else { + byte_one = EmitVexPrefixByteOne(dst.NeedsRex(), /*X=*/ false, src2.NeedsRex(), SET_VEX_M_0F); + byte_two = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, vex_pp); + } + EmitUint8(byte_zero); + EmitUint8(byte_one); + if (!is_twobyte_form) { + EmitUint8(byte_two); + } + EmitUint8(opcode); + EmitXmmRegisterOperand(dst.LowBits(), src2); +} + } // namespace x86_64 } // namespace art diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index 5f6d754f8d..2c3b3c44ee 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -1185,6 +1185,13 @@ class X86_64Assembler final : public Assembler { int SET_VEX_L, int SET_VEX_PP); + void EmitVecArithAndLogicalOperation(XmmRegister dst, + XmmRegister src1, + XmmRegister src2, + uint8_t opcode, + int vex_pp, + bool is_commutative = false); + // Helper function to emit a shorter variant of XCHG if at least one operand is RAX/EAX/AX. bool try_xchg_rax(CpuRegister dst, CpuRegister src, |