[SPARC64]: More fully work around Spitfire Errata 51.

It appears that a memory barrier soon after a mispredicted
branch, not just in the delay slot, can cause the hang
condition of this cpu errata.

So move them out-of-line, and explicitly put them into
a "branch always, predict taken" delay slot which should
fully kill this problem.

Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/arch/sparc64/kernel/signal32.c b/arch/sparc64/kernel/signal32.c
index b1ed230..aecccd0 100644
--- a/arch/sparc64/kernel/signal32.c
+++ b/arch/sparc64/kernel/signal32.c
@@ -877,11 +877,12 @@
 			unsigned long page = (unsigned long)
 				page_address(pte_page(*ptep));
 
-			__asm__ __volatile__(
-			"	membar	#StoreStore\n"
-			"	flush	%0 + %1"
-			: : "r" (page), "r" (address & (PAGE_SIZE - 1))
-			: "memory");
+			wmb();
+			__asm__ __volatile__("flush	%0 + %1"
+					     : /* no outputs */
+					     : "r" (page),
+					       "r" (address & (PAGE_SIZE - 1))
+					     : "memory");
 		}
 		pte_unmap(ptep);
 		preempt_enable();
@@ -1292,11 +1293,12 @@
 			unsigned long page = (unsigned long)
 				page_address(pte_page(*ptep));
 
-			__asm__ __volatile__(
-			"	membar	#StoreStore\n"
-			"	flush	%0 + %1"
-			: : "r" (page), "r" (address & (PAGE_SIZE - 1))
-			: "memory");
+			wmb();
+			__asm__ __volatile__("flush	%0 + %1"
+					     : /* no outputs */
+					     : "r" (page),
+					       "r" (address & (PAGE_SIZE - 1))
+					     : "memory");
 		}
 		pte_unmap(ptep);
 		preempt_enable();