Optimizing: Add Non Temporal Move support for x86
Add moves that don't pollute the data cache. These can be used for
assigning large data structures.
Change-Id: I14d91ba6264f5ce2f128033d65d59b2536426643
Signed-off-by: Mark Mendell <mark.p.mendell@intel.com>
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 44efc65..e01d476 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -145,6 +145,13 @@
EmitLabel(lbl, dst.length_ + 5);
}
+void X86Assembler::movntl(const Address& dst, Register src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x0F);
+ EmitUint8(0xC3);
+ EmitOperand(src, dst);
+}
+
void X86Assembler::bswapl(Register dst) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x0F);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index e2abcde..07958eb 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -231,6 +231,8 @@
void movl(const Address& dst, const Immediate& imm);
void movl(const Address& dst, Label* lbl);
+ void movntl(const Address& dst, Register src);
+
void bswapl(Register dst);
void movzxb(Register dst, ByteRegister src);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index 0e8c4ae..f778608 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -105,6 +105,16 @@
DriverStr(expected, "movl");
}
+TEST_F(AssemblerX86Test, Movntl) {
+ GetAssembler()->movntl(x86::Address(x86::EDI, x86::EBX, x86::TIMES_4, 12), x86::EAX);
+ GetAssembler()->movntl(x86::Address(x86::EDI, 0), x86::EAX);
+ const char* expected =
+ "movntil %EAX, 0xc(%EDI,%EBX,4)\n"
+ "movntil %EAX, (%EDI)\n";
+
+ DriverStr(expected, "movntl");
+}
+
TEST_F(AssemblerX86Test, psrlq) {
GetAssembler()->psrlq(x86::XMM0, CreateImmediate(32));
const char* expected = "psrlq $0x20, %xmm0\n";