ARCv2: STAR 9000837815 workaround hardware exclusive transactions livelock

A quad core SMP build could get into hardware livelock with concurrent
LLOCK/SCOND. Workaround that by adding a PREFETCHW which is serialized by
SCU (System Coherency Unit). It brings the cache line in Exclusive state
and makes others invalidate their lines. This gives enough time for
winner to complete the LLOCK/SCOND, before others can get the line back.

The prefetchw in the ll/sc loop is not nice but this is the only
software workaround for current version of RTL.

Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 20b7dc1..03484cb 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -23,13 +23,21 @@
 
 #define atomic_set(v, i) (((v)->counter) = (i))
 
+#ifdef CONFIG_ISA_ARCV2
+#define PREFETCHW	"	prefetchw   [%1]	\n"
+#else
+#define PREFETCHW
+#endif
+
 #define ATOMIC_OP(op, c_op, asm_op)					\
 static inline void atomic_##op(int i, atomic_t *v)			\
 {									\
 	unsigned int temp;						\
 									\
 	__asm__ __volatile__(						\
-	"1:	llock   %0, [%1]	\n"				\
+	"1:				\n"				\
+	PREFETCHW							\
+	"	llock   %0, [%1]	\n"				\
 	"	" #asm_op " %0, %0, %2	\n"				\
 	"	scond   %0, [%1]	\n"				\
 	"	bnz     1b		\n"				\
@@ -50,7 +58,9 @@
 	smp_mb();							\
 									\
 	__asm__ __volatile__(						\
-	"1:	llock   %0, [%1]	\n"				\
+	"1:				\n"				\
+	PREFETCHW							\
+	"	llock   %0, [%1]	\n"				\
 	"	" #asm_op " %0, %0, %2	\n"				\
 	"	scond   %0, [%1]	\n"				\
 	"	bnz     1b		\n"				\