summaryrefslogtreecommitdiff
path: root/compiler/utils
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/utils')
-rw-r--r--compiler/utils/swap_space.cc65
-rw-r--r--compiler/utils/swap_space.h22
-rw-r--r--compiler/utils/x86/assembler_x86.cc231
-rw-r--r--compiler/utils/x86/assembler_x86.h35
-rw-r--r--compiler/utils/x86/assembler_x86_test.cc101
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.cc206
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.h29
-rw-r--r--compiler/utils/x86_64/assembler_x86_64_test.cc100
8 files changed, 711 insertions, 78 deletions
diff --git a/compiler/utils/swap_space.cc b/compiler/utils/swap_space.cc
index 1a8f567aa1..a1eb08e041 100644
--- a/compiler/utils/swap_space.cc
+++ b/compiler/utils/swap_space.cc
@@ -36,17 +36,17 @@ template <typename FreeBySizeSet>
static void DumpFreeMap(const FreeBySizeSet& free_by_size) {
size_t last_size = static_cast<size_t>(-1);
for (const auto& entry : free_by_size) {
- if (last_size != entry.first) {
- last_size = entry.first;
+ if (last_size != entry.size) {
+ last_size = entry.size;
LOG(INFO) << "Size " << last_size;
}
- LOG(INFO) << " 0x" << std::hex << entry.second->Start()
- << " size=" << std::dec << entry.second->size;
+ LOG(INFO) << " 0x" << std::hex << entry.free_by_start_entry->Start()
+ << " size=" << std::dec << entry.free_by_start_entry->size;
}
}
void SwapSpace::RemoveChunk(FreeBySizeSet::const_iterator free_by_size_pos) {
- auto free_by_start_pos = free_by_size_pos->second;
+ auto free_by_start_pos = free_by_size_pos->free_by_start_entry;
free_by_size_.erase(free_by_size_pos);
free_by_start_.erase(free_by_start_pos);
}
@@ -89,7 +89,7 @@ static size_t CollectFree(const FreeByStartSet& free_by_start, const FreeBySizeS
// Calculate over free_by_size.
size_t sum1 = 0;
for (const auto& entry : free_by_size) {
- sum1 += entry.second->size;
+ sum1 += entry.free_by_start_entry->size;
}
// Calculate over free_by_start.
@@ -110,27 +110,52 @@ void* SwapSpace::Alloc(size_t size) {
// Check the free list for something that fits.
// TODO: Smarter implementation. Global biggest chunk, ...
- SpaceChunk old_chunk;
auto it = free_by_start_.empty()
? free_by_size_.end()
: free_by_size_.lower_bound(FreeBySizeEntry { size, free_by_start_.begin() });
if (it != free_by_size_.end()) {
- old_chunk = *it->second;
- RemoveChunk(it);
+ auto entry = it->free_by_start_entry;
+ SpaceChunk old_chunk = *entry;
+ if (old_chunk.size == size) {
+ RemoveChunk(it);
+ } else {
+ // Try to avoid deallocating and allocating the std::set<> nodes.
+ // This would be much simpler if we could use replace() from Boost.Bimap.
+
+ // The free_by_start_ map contains disjoint intervals ordered by the `ptr`.
+ // Shrinking the interval does not affect the ordering.
+ it->free_by_start_entry->ptr += size;
+ it->free_by_start_entry->size -= size;
+
+ // The free_by_size_ map is ordered by the `size` and then `free_by_start_entry->ptr`.
+ // Adjusting the `ptr` above does not change that ordering but decreasing `size` can
+ // push the node before the previous node(s).
+ if (it == free_by_size_.begin()) {
+ it->size -= size;
+ } else {
+ auto prev = it;
+ --prev;
+ FreeBySizeEntry new_value(old_chunk.size - size, entry);
+ if (free_by_size_.key_comp()(*prev, new_value)) {
+ it->size -= size;
+ } else {
+ // Changing in place would break the std::set<> ordering, we need to remove and insert.
+ free_by_size_.erase(it);
+ free_by_size_.insert(new_value);
+ }
+ }
+ }
+ return old_chunk.ptr;
} else {
// Not a big enough free chunk, need to increase file size.
- old_chunk = NewFileChunk(size);
- }
-
- void* ret = old_chunk.ptr;
-
- if (old_chunk.size != size) {
- // Insert the remainder.
- SpaceChunk new_chunk = { old_chunk.ptr + size, old_chunk.size - size };
- InsertChunk(new_chunk);
+ SpaceChunk new_chunk = NewFileChunk(size);
+ if (new_chunk.size != size) {
+ // Insert the remainder.
+ SpaceChunk remainder = { new_chunk.ptr + size, new_chunk.size - size };
+ InsertChunk(remainder);
+ }
+ return new_chunk.ptr;
}
-
- return ret;
}
SwapSpace::SpaceChunk SwapSpace::NewFileChunk(size_t min_size) {
diff --git a/compiler/utils/swap_space.h b/compiler/utils/swap_space.h
index 9600907278..c286b820fe 100644
--- a/compiler/utils/swap_space.h
+++ b/compiler/utils/swap_space.h
@@ -45,8 +45,10 @@ class SwapSpace {
private:
// Chunk of space.
struct SpaceChunk {
- uint8_t* ptr;
- size_t size;
+ // We need mutable members as we keep these objects in a std::set<> (providing only const
+ // access) but we modify these members while carefully preserving the std::set<> ordering.
+ mutable uint8_t* ptr;
+ mutable size_t size;
uintptr_t Start() const {
return reinterpret_cast<uintptr_t>(ptr);
@@ -66,13 +68,21 @@ class SwapSpace {
typedef std::set<SpaceChunk, SortChunkByPtr> FreeByStartSet;
// Map size to an iterator to free_by_start_'s entry.
- typedef std::pair<size_t, FreeByStartSet::const_iterator> FreeBySizeEntry;
+ struct FreeBySizeEntry {
+ FreeBySizeEntry(size_t sz, FreeByStartSet::const_iterator entry)
+ : size(sz), free_by_start_entry(entry) { }
+
+ // We need mutable members as we keep these objects in a std::set<> (providing only const
+ // access) but we modify these members while carefully preserving the std::set<> ordering.
+ mutable size_t size;
+ mutable FreeByStartSet::const_iterator free_by_start_entry;
+ };
struct FreeBySizeComparator {
bool operator()(const FreeBySizeEntry& lhs, const FreeBySizeEntry& rhs) {
- if (lhs.first != rhs.first) {
- return lhs.first < rhs.first;
+ if (lhs.size != rhs.size) {
+ return lhs.size < rhs.size;
} else {
- return lhs.second->Start() < rhs.second->Start();
+ return lhs.free_by_start_entry->Start() < rhs.free_by_start_entry->Start();
}
}
};
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 5a466e1d5d..6eab302dab 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -642,39 +642,6 @@ void X86Assembler::movhpd(const Address& dst, XmmRegister src) {
}
-void X86Assembler::psrldq(XmmRegister reg, const Immediate& shift_count) {
- DCHECK(shift_count.is_uint8());
-
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- EmitUint8(0x66);
- EmitUint8(0x0F);
- EmitUint8(0x73);
- EmitXmmRegisterOperand(3, reg);
- EmitUint8(shift_count.value());
-}
-
-
-void X86Assembler::psrlq(XmmRegister reg, const Immediate& shift_count) {
- DCHECK(shift_count.is_uint8());
-
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- EmitUint8(0x66);
- EmitUint8(0x0F);
- EmitUint8(0x73);
- EmitXmmRegisterOperand(2, reg);
- EmitUint8(shift_count.value());
-}
-
-
-void X86Assembler::punpckldq(XmmRegister dst, XmmRegister src) {
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- EmitUint8(0x66);
- EmitUint8(0x0F);
- EmitUint8(0x62);
- EmitXmmRegisterOperand(dst, src);
-}
-
-
void X86Assembler::addsd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF2);
@@ -828,6 +795,51 @@ void X86Assembler::movdqu(const Address& dst, XmmRegister src) {
}
+void X86Assembler::paddb(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0xFC);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::psubb(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0xF8);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::paddw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0xFD);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::psubw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0xF9);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::pmullw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0xD5);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
void X86Assembler::paddd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
@@ -856,6 +868,24 @@ void X86Assembler::pmulld(XmmRegister dst, XmmRegister src) {
}
+void X86Assembler::paddq(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0xD4);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::psubq(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0xFB);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
void X86Assembler::cvtsi2ss(XmmRegister dst, Register src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF3);
@@ -1186,6 +1216,141 @@ void X86Assembler::pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm
}
+void X86Assembler::punpcklbw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x60);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::punpcklwd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x61);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::punpckldq(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x62);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::punpcklqdq(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x6C);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::psllw(XmmRegister reg, const Immediate& shift_count) {
+ DCHECK(shift_count.is_uint8());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x71);
+ EmitXmmRegisterOperand(6, reg);
+ EmitUint8(shift_count.value());
+}
+
+
+void X86Assembler::pslld(XmmRegister reg, const Immediate& shift_count) {
+ DCHECK(shift_count.is_uint8());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x72);
+ EmitXmmRegisterOperand(6, reg);
+ EmitUint8(shift_count.value());
+}
+
+
+void X86Assembler::psllq(XmmRegister reg, const Immediate& shift_count) {
+ DCHECK(shift_count.is_uint8());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x73);
+ EmitXmmRegisterOperand(6, reg);
+ EmitUint8(shift_count.value());
+}
+
+
+void X86Assembler::psraw(XmmRegister reg, const Immediate& shift_count) {
+ DCHECK(shift_count.is_uint8());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x71);
+ EmitXmmRegisterOperand(4, reg);
+ EmitUint8(shift_count.value());
+}
+
+
+void X86Assembler::psrad(XmmRegister reg, const Immediate& shift_count) {
+ DCHECK(shift_count.is_uint8());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x72);
+ EmitXmmRegisterOperand(4, reg);
+ EmitUint8(shift_count.value());
+}
+
+
+void X86Assembler::psrlw(XmmRegister reg, const Immediate& shift_count) {
+ DCHECK(shift_count.is_uint8());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x71);
+ EmitXmmRegisterOperand(2, reg);
+ EmitUint8(shift_count.value());
+}
+
+
+void X86Assembler::psrld(XmmRegister reg, const Immediate& shift_count) {
+ DCHECK(shift_count.is_uint8());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x72);
+ EmitXmmRegisterOperand(2, reg);
+ EmitUint8(shift_count.value());
+}
+
+
+void X86Assembler::psrlq(XmmRegister reg, const Immediate& shift_count) {
+ DCHECK(shift_count.is_uint8());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x73);
+ EmitXmmRegisterOperand(2, reg);
+ EmitUint8(shift_count.value());
+}
+
+
+void X86Assembler::psrldq(XmmRegister reg, const Immediate& shift_count) {
+ DCHECK(shift_count.is_uint8());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x73);
+ EmitXmmRegisterOperand(3, reg);
+ EmitUint8(shift_count.value());
+}
+
+
void X86Assembler::fldl(const Address& src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xDD);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 4343e2e734..2999599fc5 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -408,14 +408,9 @@ class X86Assembler FINAL : public Assembler {
void movsd(const Address& dst, XmmRegister src);
void movsd(XmmRegister dst, XmmRegister src);
- void psrlq(XmmRegister reg, const Immediate& shift_count);
- void punpckldq(XmmRegister dst, XmmRegister src);
-
void movhpd(XmmRegister dst, const Address& src);
void movhpd(const Address& dst, XmmRegister src);
- void psrldq(XmmRegister reg, const Immediate& shift_count);
-
void addsd(XmmRegister dst, XmmRegister src);
void addsd(XmmRegister dst, const Address& src);
void subsd(XmmRegister dst, XmmRegister src);
@@ -436,10 +431,20 @@ class X86Assembler FINAL : public Assembler {
void movdqa(const Address& dst, XmmRegister src); // store aligned
void movdqu(const Address& dst, XmmRegister src); // store unaligned
- void paddd(XmmRegister dst, XmmRegister src); // no addr variant (for now)
+ void paddb(XmmRegister dst, XmmRegister src); // no addr variant (for now)
+ void psubb(XmmRegister dst, XmmRegister src);
+
+ void paddw(XmmRegister dst, XmmRegister src);
+ void psubw(XmmRegister dst, XmmRegister src);
+ void pmullw(XmmRegister dst, XmmRegister src);
+
+ void paddd(XmmRegister dst, XmmRegister src);
void psubd(XmmRegister dst, XmmRegister src);
void pmulld(XmmRegister dst, XmmRegister src);
+ void paddq(XmmRegister dst, XmmRegister src);
+ void psubq(XmmRegister dst, XmmRegister src);
+
void cvtsi2ss(XmmRegister dst, Register src);
void cvtsi2sd(XmmRegister dst, Register src);
@@ -489,6 +494,24 @@ class X86Assembler FINAL : public Assembler {
void shufps(XmmRegister dst, XmmRegister src, const Immediate& imm);
void pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm);
+ void punpcklbw(XmmRegister dst, XmmRegister src);
+ void punpcklwd(XmmRegister dst, XmmRegister src);
+ void punpckldq(XmmRegister dst, XmmRegister src);
+ void punpcklqdq(XmmRegister dst, XmmRegister src);
+
+ void psllw(XmmRegister reg, const Immediate& shift_count);
+ void pslld(XmmRegister reg, const Immediate& shift_count);
+ void psllq(XmmRegister reg, const Immediate& shift_count);
+
+ void psraw(XmmRegister reg, const Immediate& shift_count);
+ void psrad(XmmRegister reg, const Immediate& shift_count);
+ // no psraq
+
+ void psrlw(XmmRegister reg, const Immediate& shift_count);
+ void psrld(XmmRegister reg, const Immediate& shift_count);
+ void psrlq(XmmRegister reg, const Immediate& shift_count);
+ void psrldq(XmmRegister reg, const Immediate& shift_count);
+
void flds(const Address& src);
void fstps(const Address& dst);
void fsts(const Address& dst);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index c6ab893aea..a74bea207e 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -122,18 +122,6 @@ TEST_F(AssemblerX86Test, Movntl) {
DriverStr(expected, "movntl");
}
-TEST_F(AssemblerX86Test, psrlq) {
- GetAssembler()->psrlq(x86::XMM0, CreateImmediate(32));
- const char* expected = "psrlq $0x20, %xmm0\n";
- DriverStr(expected, "psrlq");
-}
-
-TEST_F(AssemblerX86Test, punpckldq) {
- GetAssembler()->punpckldq(x86::XMM0, x86::XMM1);
- const char* expected = "punpckldq %xmm1, %xmm0\n";
- DriverStr(expected, "punpckldq");
-}
-
TEST_F(AssemblerX86Test, LoadLongConstant) {
GetAssembler()->LoadLongConstant(x86::XMM0, 51);
const char* expected =
@@ -521,6 +509,26 @@ TEST_F(AssemblerX86Test, DivPD) {
DriverStr(RepeatFF(&x86::X86Assembler::divpd, "divpd %{reg2}, %{reg1}"), "divpd");
}
+TEST_F(AssemblerX86Test, PAddB) {
+ DriverStr(RepeatFF(&x86::X86Assembler::paddb, "paddb %{reg2}, %{reg1}"), "paddb");
+}
+
+TEST_F(AssemblerX86Test, PSubB) {
+ DriverStr(RepeatFF(&x86::X86Assembler::psubb, "psubb %{reg2}, %{reg1}"), "psubb");
+}
+
+TEST_F(AssemblerX86Test, PAddW) {
+ DriverStr(RepeatFF(&x86::X86Assembler::paddw, "paddw %{reg2}, %{reg1}"), "paddw");
+}
+
+TEST_F(AssemblerX86Test, PSubW) {
+ DriverStr(RepeatFF(&x86::X86Assembler::psubw, "psubw %{reg2}, %{reg1}"), "psubw");
+}
+
+TEST_F(AssemblerX86Test, PMullW) {
+ DriverStr(RepeatFF(&x86::X86Assembler::pmullw, "pmullw %{reg2}, %{reg1}"), "pmullw");
+}
+
TEST_F(AssemblerX86Test, PAddD) {
DriverStr(RepeatFF(&x86::X86Assembler::paddd, "paddd %{reg2}, %{reg1}"), "paddd");
}
@@ -533,6 +541,14 @@ TEST_F(AssemblerX86Test, PMullD) {
DriverStr(RepeatFF(&x86::X86Assembler::pmulld, "pmulld %{reg2}, %{reg1}"), "pmulld");
}
+TEST_F(AssemblerX86Test, PAddQ) {
+ DriverStr(RepeatFF(&x86::X86Assembler::paddq, "paddq %{reg2}, %{reg1}"), "paddq");
+}
+
+TEST_F(AssemblerX86Test, PSubQ) {
+ DriverStr(RepeatFF(&x86::X86Assembler::psubq, "psubq %{reg2}, %{reg1}"), "psubq");
+}
+
TEST_F(AssemblerX86Test, XorPD) {
DriverStr(RepeatFF(&x86::X86Assembler::xorpd, "xorpd %{reg2}, %{reg1}"), "xorpd");
}
@@ -581,6 +597,67 @@ TEST_F(AssemblerX86Test, PShufD) {
DriverStr(RepeatFFI(&x86::X86Assembler::pshufd, 1, "pshufd ${imm}, %{reg2}, %{reg1}"), "pshufd");
}
+TEST_F(AssemblerX86Test, Punpcklbw) {
+ DriverStr(RepeatFF(&x86::X86Assembler::punpcklbw, "punpcklbw %{reg2}, %{reg1}"), "punpcklbw");
+}
+
+TEST_F(AssemblerX86Test, Punpcklwd) {
+ DriverStr(RepeatFF(&x86::X86Assembler::punpcklwd, "punpcklwd %{reg2}, %{reg1}"), "punpcklwd");
+}
+
+TEST_F(AssemblerX86Test, Punpckldq) {
+ DriverStr(RepeatFF(&x86::X86Assembler::punpckldq, "punpckldq %{reg2}, %{reg1}"), "punpckldq");
+}
+
+TEST_F(AssemblerX86Test, Punpcklqdq) {
+ DriverStr(RepeatFF(&x86::X86Assembler::punpcklqdq, "punpcklqdq %{reg2}, %{reg1}"), "punpcklqdq");
+}
+
+TEST_F(AssemblerX86Test, psllw) {
+ GetAssembler()->psllw(x86::XMM0, CreateImmediate(16));
+ DriverStr("psllw $0x10, %xmm0\n", "psllwi");
+}
+
+TEST_F(AssemblerX86Test, pslld) {
+ GetAssembler()->pslld(x86::XMM0, CreateImmediate(16));
+ DriverStr("pslld $0x10, %xmm0\n", "pslldi");
+}
+
+TEST_F(AssemblerX86Test, psllq) {
+ GetAssembler()->psllq(x86::XMM0, CreateImmediate(16));
+ DriverStr("psllq $0x10, %xmm0\n", "psllqi");
+}
+
+TEST_F(AssemblerX86Test, psraw) {
+ GetAssembler()->psraw(x86::XMM0, CreateImmediate(16));
+ DriverStr("psraw $0x10, %xmm0\n", "psrawi");
+}
+
+TEST_F(AssemblerX86Test, psrad) {
+ GetAssembler()->psrad(x86::XMM0, CreateImmediate(16));
+ DriverStr("psrad $0x10, %xmm0\n", "psradi");
+}
+
+TEST_F(AssemblerX86Test, psrlw) {
+ GetAssembler()->psrlw(x86::XMM0, CreateImmediate(16));
+ DriverStr("psrlw $0x10, %xmm0\n", "psrlwi");
+}
+
+TEST_F(AssemblerX86Test, psrld) {
+ GetAssembler()->psrld(x86::XMM0, CreateImmediate(16));
+ DriverStr("psrld $0x10, %xmm0\n", "psrldi");
+}
+
+TEST_F(AssemblerX86Test, psrlq) {
+ GetAssembler()->psrlq(x86::XMM0, CreateImmediate(16));
+ DriverStr("psrlq $0x10, %xmm0\n", "psrlqi");
+}
+
+TEST_F(AssemblerX86Test, psrldq) {
+ GetAssembler()->psrldq(x86::XMM0, CreateImmediate(16));
+ DriverStr("psrldq $0x10, %xmm0\n", "psrldqi");
+}
+
/////////////////
// Near labels //
/////////////////
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index b41be80ae4..458204aca9 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -882,6 +882,56 @@ void X86_64Assembler::movdqu(const Address& dst, XmmRegister src) {
}
+void X86_64Assembler::paddb(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0xFC);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
+void X86_64Assembler::psubb(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0xF8);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
+void X86_64Assembler::paddw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0xFD);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
+void X86_64Assembler::psubw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0xF9);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
+void X86_64Assembler::pmullw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0xD5);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
void X86_64Assembler::paddd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
@@ -913,6 +963,26 @@ void X86_64Assembler::pmulld(XmmRegister dst, XmmRegister src) {
}
+void X86_64Assembler::paddq(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0xD4);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
+void X86_64Assembler::psubq(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0xFB);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
void X86_64Assembler::cvtsi2ss(XmmRegister dst, CpuRegister src) {
cvtsi2ss(dst, src, false);
}
@@ -1354,6 +1424,142 @@ void X86_64Assembler::pshufd(XmmRegister dst, XmmRegister src, const Immediate&
}
+void X86_64Assembler::punpcklbw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x60);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
+void X86_64Assembler::punpcklwd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x61);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
+void X86_64Assembler::punpckldq(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x62);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
+void X86_64Assembler::punpcklqdq(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x6C);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
+void X86_64Assembler::psllw(XmmRegister reg, const Immediate& shift_count) {
+ DCHECK(shift_count.is_uint8());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex(false, false, false, false, reg.NeedsRex());
+ EmitUint8(0x0F);
+ EmitUint8(0x71);
+ EmitXmmRegisterOperand(6, reg);
+ EmitUint8(shift_count.value());
+}
+
+
+void X86_64Assembler::pslld(XmmRegister reg, const Immediate& shift_count) {
+ DCHECK(shift_count.is_uint8());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex(false, false, false, false, reg.NeedsRex());
+ EmitUint8(0x0F);
+ EmitUint8(0x72);
+ EmitXmmRegisterOperand(6, reg);
+ EmitUint8(shift_count.value());
+}
+
+
+void X86_64Assembler::psllq(XmmRegister reg, const Immediate& shift_count) {
+ DCHECK(shift_count.is_uint8());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex(false, false, false, false, reg.NeedsRex());
+ EmitUint8(0x0F);
+ EmitUint8(0x73);
+ EmitXmmRegisterOperand(6, reg);
+ EmitUint8(shift_count.value());
+}
+
+
+void X86_64Assembler::psraw(XmmRegister reg, const Immediate& shift_count) {
+ DCHECK(shift_count.is_uint8());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex(false, false, false, false, reg.NeedsRex());
+ EmitUint8(0x0F);
+ EmitUint8(0x71);
+ EmitXmmRegisterOperand(4, reg);
+ EmitUint8(shift_count.value());
+}
+
+
+void X86_64Assembler::psrad(XmmRegister reg, const Immediate& shift_count) {
+ DCHECK(shift_count.is_uint8());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex(false, false, false, false, reg.NeedsRex());
+ EmitUint8(0x0F);
+ EmitUint8(0x72);
+ EmitXmmRegisterOperand(4, reg);
+ EmitUint8(shift_count.value());
+}
+
+
+void X86_64Assembler::psrlw(XmmRegister reg, const Immediate& shift_count) {
+ DCHECK(shift_count.is_uint8());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex(false, false, false, false, reg.NeedsRex());
+ EmitUint8(0x0F);
+ EmitUint8(0x71);
+ EmitXmmRegisterOperand(2, reg);
+ EmitUint8(shift_count.value());
+}
+
+
+void X86_64Assembler::psrld(XmmRegister reg, const Immediate& shift_count) {
+ DCHECK(shift_count.is_uint8());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex(false, false, false, false, reg.NeedsRex());
+ EmitUint8(0x0F);
+ EmitUint8(0x72);
+ EmitXmmRegisterOperand(2, reg);
+ EmitUint8(shift_count.value());
+}
+
+
+void X86_64Assembler::psrlq(XmmRegister reg, const Immediate& shift_count) {
+ DCHECK(shift_count.is_uint8());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex(false, false, false, false, reg.NeedsRex());
+ EmitUint8(0x0F);
+ EmitUint8(0x73);
+ EmitXmmRegisterOperand(2, reg);
+ EmitUint8(shift_count.value());
+}
+
+
void X86_64Assembler::fldl(const Address& src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xDD);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 43ea12a4cb..0dc11d840b 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -452,10 +452,20 @@ class X86_64Assembler FINAL : public Assembler {
void movdqa(const Address& dst, XmmRegister src); // store aligned
void movdqu(const Address& dst, XmmRegister src); // store unaligned
- void paddd(XmmRegister dst, XmmRegister src); // no addr variant (for now)
+ void paddb(XmmRegister dst, XmmRegister src); // no addr variant (for now)
+ void psubb(XmmRegister dst, XmmRegister src);
+
+ void paddw(XmmRegister dst, XmmRegister src);
+ void psubw(XmmRegister dst, XmmRegister src);
+ void pmullw(XmmRegister dst, XmmRegister src);
+
+ void paddd(XmmRegister dst, XmmRegister src);
void psubd(XmmRegister dst, XmmRegister src);
void pmulld(XmmRegister dst, XmmRegister src);
+ void paddq(XmmRegister dst, XmmRegister src);
+ void psubq(XmmRegister dst, XmmRegister src);
+
void cvtsi2ss(XmmRegister dst, CpuRegister src); // Note: this is the r/m32 version.
void cvtsi2ss(XmmRegister dst, CpuRegister src, bool is64bit);
void cvtsi2ss(XmmRegister dst, const Address& src, bool is64bit);
@@ -512,6 +522,23 @@ class X86_64Assembler FINAL : public Assembler {
void shufps(XmmRegister dst, XmmRegister src, const Immediate& imm);
void pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm);
+ void punpcklbw(XmmRegister dst, XmmRegister src);
+ void punpcklwd(XmmRegister dst, XmmRegister src);
+ void punpckldq(XmmRegister dst, XmmRegister src);
+ void punpcklqdq(XmmRegister dst, XmmRegister src);
+
+ void psllw(XmmRegister reg, const Immediate& shift_count);
+ void pslld(XmmRegister reg, const Immediate& shift_count);
+ void psllq(XmmRegister reg, const Immediate& shift_count);
+
+ void psraw(XmmRegister reg, const Immediate& shift_count);
+ void psrad(XmmRegister reg, const Immediate& shift_count);
+ // no psraq
+
+ void psrlw(XmmRegister reg, const Immediate& shift_count);
+ void psrld(XmmRegister reg, const Immediate& shift_count);
+ void psrlq(XmmRegister reg, const Immediate& shift_count);
+
void flds(const Address& src);
void fstps(const Address& dst);
void fsts(const Address& dst);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index aeb1911835..fe9449720f 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -1128,6 +1128,26 @@ TEST_F(AssemblerX86_64Test, Divpd) {
DriverStr(RepeatFF(&x86_64::X86_64Assembler::divpd, "divpd %{reg2}, %{reg1}"), "divpd");
}
+TEST_F(AssemblerX86_64Test, Paddb) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::paddb, "paddb %{reg2}, %{reg1}"), "paddb");
+}
+
+TEST_F(AssemblerX86_64Test, Psubb) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::psubb, "psubb %{reg2}, %{reg1}"), "psubb");
+}
+
+TEST_F(AssemblerX86_64Test, Paddw) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::paddw, "paddw %{reg2}, %{reg1}"), "paddw");
+}
+
+TEST_F(AssemblerX86_64Test, Psubw) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::psubw, "psubw %{reg2}, %{reg1}"), "psubw");
+}
+
+TEST_F(AssemblerX86_64Test, Pmullw) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmullw, "pmullw %{reg2}, %{reg1}"), "pmullw");
+}
+
TEST_F(AssemblerX86_64Test, Paddd) {
DriverStr(RepeatFF(&x86_64::X86_64Assembler::paddd, "paddd %{reg2}, %{reg1}"), "paddd");
}
@@ -1140,6 +1160,14 @@ TEST_F(AssemblerX86_64Test, Pmulld) {
DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmulld, "pmulld %{reg2}, %{reg1}"), "pmulld");
}
+TEST_F(AssemblerX86_64Test, Paddq) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::paddq, "paddq %{reg2}, %{reg1}"), "paddq");
+}
+
+TEST_F(AssemblerX86_64Test, Psubq) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::psubq, "psubq %{reg2}, %{reg1}"), "psubq");
+}
+
TEST_F(AssemblerX86_64Test, Cvtsi2ss) {
DriverStr(RepeatFr(&x86_64::X86_64Assembler::cvtsi2ss, "cvtsi2ss %{reg2}, %{reg1}"), "cvtsi2ss");
}
@@ -1261,6 +1289,78 @@ TEST_F(AssemblerX86_64Test, PShufd) {
DriverStr(RepeatFFI(&x86_64::X86_64Assembler::pshufd, 1, "pshufd ${imm}, %{reg2}, %{reg1}"), "pshufd");
}
+TEST_F(AssemblerX86_64Test, Punpcklbw) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpcklbw, "punpcklbw %{reg2}, %{reg1}"), "punpcklbw");
+}
+
+TEST_F(AssemblerX86_64Test, Punpcklwd) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpcklwd, "punpcklwd %{reg2}, %{reg1}"), "punpcklwd");
+}
+
+TEST_F(AssemblerX86_64Test, Punpckldq) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpckldq, "punpckldq %{reg2}, %{reg1}"), "punpckldq");
+}
+
+TEST_F(AssemblerX86_64Test, Punpcklqdq) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpcklqdq, "punpcklqdq %{reg2}, %{reg1}"), "punpcklqdq");
+}
+
+TEST_F(AssemblerX86_64Test, Psllw) {
+ GetAssembler()->psllw(x86_64::XmmRegister(x86_64::XMM0), x86_64::Immediate(1));
+ GetAssembler()->psllw(x86_64::XmmRegister(x86_64::XMM15), x86_64::Immediate(2));
+ DriverStr("psllw $1, %xmm0\n"
+ "psllw $2, %xmm15\n", "psllwi");
+}
+
+TEST_F(AssemblerX86_64Test, Pslld) {
+ GetAssembler()->pslld(x86_64::XmmRegister(x86_64::XMM0), x86_64::Immediate(1));
+ GetAssembler()->pslld(x86_64::XmmRegister(x86_64::XMM15), x86_64::Immediate(2));
+ DriverStr("pslld $1, %xmm0\n"
+ "pslld $2, %xmm15\n", "pslldi");
+}
+
+TEST_F(AssemblerX86_64Test, Psllq) {
+ GetAssembler()->psllq(x86_64::XmmRegister(x86_64::XMM0), x86_64::Immediate(1));
+ GetAssembler()->psllq(x86_64::XmmRegister(x86_64::XMM15), x86_64::Immediate(2));
+ DriverStr("psllq $1, %xmm0\n"
+ "psllq $2, %xmm15\n", "psllqi");
+}
+
+TEST_F(AssemblerX86_64Test, Psraw) {
+ GetAssembler()->psraw(x86_64::XmmRegister(x86_64::XMM0), x86_64::Immediate(1));
+ GetAssembler()->psraw(x86_64::XmmRegister(x86_64::XMM15), x86_64::Immediate(2));
+ DriverStr("psraw $1, %xmm0\n"
+ "psraw $2, %xmm15\n", "psrawi");
+}
+
+TEST_F(AssemblerX86_64Test, Psrad) {
+ GetAssembler()->psrad(x86_64::XmmRegister(x86_64::XMM0), x86_64::Immediate(1));
+ GetAssembler()->psrad(x86_64::XmmRegister(x86_64::XMM15), x86_64::Immediate(2));
+ DriverStr("psrad $1, %xmm0\n"
+ "psrad $2, %xmm15\n", "psradi");
+}
+
+TEST_F(AssemblerX86_64Test, Psrlw) {
+ GetAssembler()->psrlw(x86_64::XmmRegister(x86_64::XMM0), x86_64::Immediate(1));
+ GetAssembler()->psrlw(x86_64::XmmRegister(x86_64::XMM15), x86_64::Immediate(2));
+ DriverStr("psrlw $1, %xmm0\n"
+ "psrlw $2, %xmm15\n", "psrlwi");
+}
+
+TEST_F(AssemblerX86_64Test, Psrld) {
+ GetAssembler()->psrld(x86_64::XmmRegister(x86_64::XMM0), x86_64::Immediate(1));
+ GetAssembler()->psrld(x86_64::XmmRegister(x86_64::XMM15), x86_64::Immediate(2));
+ DriverStr("psrld $1, %xmm0\n"
+ "psrld $2, %xmm15\n", "pslldi");
+}
+
+TEST_F(AssemblerX86_64Test, Psrlq) {
+ GetAssembler()->psrlq(x86_64::XmmRegister(x86_64::XMM0), x86_64::Immediate(1));
+ GetAssembler()->psrlq(x86_64::XmmRegister(x86_64::XMM15), x86_64::Immediate(2));
+ DriverStr("psrlq $1, %xmm0\n"
+ "psrlq $2, %xmm15\n", "pslrqi");
+}
+
TEST_F(AssemblerX86_64Test, UcomissAddress) {
GetAssembler()->ucomiss(x86_64::XmmRegister(x86_64::XMM0), x86_64::Address(
x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));