x86/fpu/math-emu: Add support for FISTTP instructions

These FPU instructions were added in SSE3-enabled CPUs.

Run-tested by booting with "no387 nofxsr" and running test
program:

	[RUN]	Testing fisttp instructions
	[OK]	fisttp

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Shuah Khan <shuahkh@osg.samsung.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Link: http://lkml.kernel.org/r/1442600614-28428-1-git-send-email-dvlasenk@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
diff --git a/arch/x86/math-emu/load_store.c b/arch/x86/math-emu/load_store.c
index 2931ff3..95228ff 100644
--- a/arch/x86/math-emu/load_store.c
+++ b/arch/x86/math-emu/load_store.c
@@ -33,11 +33,12 @@
 
 #define pop_0()	{ FPU_settag0(TAG_Empty); top++; }
 
+/* index is a 5-bit value: (3-bit FPU_modrm.reg field | opcode[2,1]) */
 static u_char const type_table[32] = {
-	_PUSH_, _PUSH_, _PUSH_, _PUSH_,
-	_null_, _null_, _null_, _null_,
-	_REG0_, _REG0_, _REG0_, _REG0_,
-	_REG0_, _REG0_, _REG0_, _REG0_,
+	_PUSH_, _PUSH_, _PUSH_, _PUSH_, /* /0: d9:fld f32,  db:fild m32,  dd:fld f64,  df:fild m16 */
+	_null_, _REG0_, _REG0_, _REG0_, /* /1: d9:undef,    db,dd,df:fisttp m32/64/16 */
+	_REG0_, _REG0_, _REG0_, _REG0_, /* /2: d9:fst f32,  db:fist m32,  dd:fst f64,  df:fist m16 */
+	_REG0_, _REG0_, _REG0_, _REG0_, /* /3: d9:fstp f32, db:fistp m32, dd:fstp f64, df:fistp m16 */
 	_NONE_, _null_, _NONE_, _PUSH_,
 	_NONE_, _PUSH_, _null_, _PUSH_,
 	_NONE_, _null_, _NONE_, _REG0_,
@@ -45,15 +46,19 @@
 };
 
 u_char const data_sizes_16[32] = {
-	4, 4, 8, 2, 0, 0, 0, 0,
-	4, 4, 8, 2, 4, 4, 8, 2,
+	4, 4, 8, 2,
+	0, 4, 8, 2, /* /1: d9:undef, db,dd,df:fisttp */
+	4, 4, 8, 2,
+	4, 4, 8, 2,
 	14, 0, 94, 10, 2, 10, 0, 8,
 	14, 0, 94, 10, 2, 10, 2, 8
 };
 
 static u_char const data_sizes_32[32] = {
-	4, 4, 8, 2, 0, 0, 0, 0,
-	4, 4, 8, 2, 4, 4, 8, 2,
+	4, 4, 8, 2,
+	0, 4, 8, 2, /* /1: d9:undef, db,dd,df:fisttp */
+	4, 4, 8, 2,
+	4, 4, 8, 2,
 	28, 0, 108, 10, 2, 10, 0, 8,
 	28, 0, 108, 10, 2, 10, 2, 8
 };
@@ -65,6 +70,7 @@
 	FPU_REG *st0_ptr;
 	u_char st0_tag = TAG_Empty;	/* This is just to stop a gcc warning. */
 	u_char loaded_tag;
+	int sv_cw;
 
 	st0_ptr = NULL;		/* Initialized just to stop compiler warnings. */
 
@@ -111,7 +117,8 @@
 	}
 
 	switch (type) {
-	case 000:		/* fld m32real */
+	/* type is a 5-bit value: (3-bit FPU_modrm.reg field | opcode[2,1]) */
+	case 000:		/* fld m32real (d9 /0) */
 		clear_C1();
 		loaded_tag =
 		    FPU_load_single((float __user *)data_address, &loaded_data);
@@ -123,13 +130,13 @@
 		}
 		FPU_copy_to_reg0(&loaded_data, loaded_tag);
 		break;
-	case 001:		/* fild m32int */
+	case 001:		/* fild m32int (db /0) */
 		clear_C1();
 		loaded_tag =
 		    FPU_load_int32((long __user *)data_address, &loaded_data);
 		FPU_copy_to_reg0(&loaded_data, loaded_tag);
 		break;
-	case 002:		/* fld m64real */
+	case 002:		/* fld m64real (dd /0) */
 		clear_C1();
 		loaded_tag =
 		    FPU_load_double((double __user *)data_address,
@@ -142,12 +149,44 @@
 		}
 		FPU_copy_to_reg0(&loaded_data, loaded_tag);
 		break;
-	case 003:		/* fild m16int */
+	case 003:		/* fild m16int (df /0) */
 		clear_C1();
 		loaded_tag =
 		    FPU_load_int16((short __user *)data_address, &loaded_data);
 		FPU_copy_to_reg0(&loaded_data, loaded_tag);
 		break;
+	/* case 004: undefined (d9 /1) */
+	/* fisttp are enabled if CPUID(1).ECX(0) "sse3" is set */
+	case 005:		/* fisttp m32int (db /1) */
+		clear_C1();
+		sv_cw = control_word;
+		control_word |= RC_CHOP;
+		if (FPU_store_int32
+		    (st0_ptr, st0_tag, (long __user *)data_address))
+			pop_0();	/* pop only if the number was actually stored
+					   (see the 80486 manual p16-28) */
+		control_word = sv_cw;
+		break;
+	case 006:		/* fisttp m64int (dd /1) */
+		clear_C1();
+		sv_cw = control_word;
+		control_word |= RC_CHOP;
+		if (FPU_store_int64
+		    (st0_ptr, st0_tag, (long long __user *)data_address))
+			pop_0();	/* pop only if the number was actually stored
+					   (see the 80486 manual p16-28) */
+		control_word = sv_cw;
+		break;
+	case 007:		/* fisttp m16int (df /1) */
+		clear_C1();
+		sv_cw = control_word;
+		control_word |= RC_CHOP;
+		if (FPU_store_int16
+		    (st0_ptr, st0_tag, (short __user *)data_address))
+			pop_0();	/* pop only if the number was actually stored
+					   (see the 80486 manual p16-28) */
+		control_word = sv_cw;
+		break;
 	case 010:		/* fst m32real */
 		clear_C1();
 		FPU_store_single(st0_ptr, st0_tag,