ARC: [SMP] Fix build failures for large NR_CPUS

ST.as only takes S9 (255) for offset. This was going out of range when
accessing a task_struct field with 4k NR_CPUS (due to 128b of coumaks
itself in there).

Workaround by using an intermediate register to do the address scaling.

There is some duplication of fix for ctx_sw.c and ctx_sw_asm.S however
given that C version will go away soon I'm not bothering to factor out
the common code.

Reported-by: Noam Camus <noamc@ezchip.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
diff --git a/arch/arc/kernel/ctx_sw.c b/arch/arc/kernel/ctx_sw.c
index 34410eb..c14a5be 100644
--- a/arch/arc/kernel/ctx_sw.c
+++ b/arch/arc/kernel/ctx_sw.c
@@ -17,6 +17,8 @@
 #include <asm/asm-offsets.h>
 #include <linux/sched.h>
 
+#define KSP_WORD_OFF 	((TASK_THREAD + THREAD_KSP) / 4)
+
 struct task_struct *__sched
 __switch_to(struct task_struct *prev_task, struct task_struct *next_task)
 {
@@ -45,7 +47,16 @@
 #endif
 
 		/* set ksp of outgoing task in tsk->thread.ksp */
+#if KSP_WORD_OFF <= 255
 		"st.as   sp, [%3, %1]    \n\t"
+#else
+		/*
+		 * Workaround for NR_CPUS=4k
+		 * %1 is bigger than 255 (S9 offset for st.as)
+		 */
+		"add2    r24, %3, %1     \n\t"
+		"st      sp, [r24]       \n\t"
+#endif
 
 		"sync   \n\t"
 
@@ -97,7 +108,7 @@
 		/* FP/BLINK restore generated by gcc (standard func epilogue */
 
 		: "=r"(tmp)
-		: "n"((TASK_THREAD + THREAD_KSP) / 4), "r"(next), "r"(prev)
+		: "n"(KSP_WORD_OFF), "r"(next), "r"(prev)
 		: "blink"
 	);
 
diff --git a/arch/arc/kernel/ctx_sw_asm.S b/arch/arc/kernel/ctx_sw_asm.S
index d897234..65690e7 100644
--- a/arch/arc/kernel/ctx_sw_asm.S
+++ b/arch/arc/kernel/ctx_sw_asm.S
@@ -14,6 +14,8 @@
 #include <asm/asm-offsets.h>
 #include <asm/linkage.h>
 
+#define KSP_WORD_OFF 	((TASK_THREAD + THREAD_KSP) / 4)
+
 ;################### Low Level Context Switch ##########################
 
 	.section .sched.text,"ax",@progbits
@@ -28,8 +30,13 @@
 	SAVE_CALLEE_SAVED_KERNEL
 
 	/* Save the now KSP in task->thread.ksp */
-	st.as  sp, [r0, (TASK_THREAD + THREAD_KSP)/4]
-
+#if KSP_WORD_OFF  <= 255
+	st.as  sp, [r0, KSP_WORD_OFF]
+#else
+	/* Workaround for NR_CPUS=4k as ST.as can only take s9 offset */
+	add2	r24, r0, KSP_WORD_OFF
+	st	sp, [r24]
+#endif
 	/*
 	* Return last task in r0 (return reg)
 	* On ARC, Return reg = First Arg reg = r0.