[MIPS] Fix possible hang in LL/SC futex loops.

The LL / SC loops in __futex_atomic_op() have the usual fixups necessary
for memory acccesses to userspace from kernel space installed:

        __asm__ __volatile__(
        "       .set    push                            \n"
        "       .set    noat                            \n"
        "       .set    mips3                           \n"
        "1:     ll      %1, %4  # __futex_atomic_op     \n"
        "       .set    mips0                           \n"
        "       " insn  "                               \n"
        "       .set    mips3                           \n"
        "2:     sc      $1, %2                          \n"
        "       beqz    $1, 1b                          \n"
        __WEAK_LLSC_MB
        "3:                                             \n"
        "       .set    pop                             \n"
        "       .set    mips0                           \n"
        "       .section .fixup,\"ax\"                  \n"
        "4:     li      %0, %6                          \n"
        "       j       2b                              \n"	<-----
        "       .previous                               \n"
        "       .section __ex_table,\"a\"               \n"
        "       "__UA_ADDR "\t1b, 4b                    \n"
        "       "__UA_ADDR "\t2b, 4b                    \n"
        "       .previous                               \n"
        : "=r" (ret), "=&r" (oldval), "=R" (*uaddr)
        : "0" (0), "R" (*uaddr), "Jr" (oparg), "i" (-EFAULT)
        : "memory");

The branch at the end of the fixup code, it goes back to the SC
instruction, no matter if the fault was first taken by the LL or SC
instruction resulting in an endless loop which will only terminate if
the address become valid again due to another thread setting up an
accessible mapping and the CPU happens to execute the SC instruction
successfully which due to the preceeding ERET instruction of the fault
handler would only happen if UNPREDICTABLE instruction behaviour of the
SC instruction without a preceeding LL happens to favor that outcome.
But normally processes are nice, pass valid arguments and we were just
getting away with this.

Thanks to Kaz Kylheku <kaz@zeugmasystems.com> for providing the original
report and a test case.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
diff --git a/include/asm-mips/futex.h b/include/asm-mips/futex.h
index 3e7e30d..17f082c 100644
--- a/include/asm-mips/futex.h
+++ b/include/asm-mips/futex.h
@@ -35,7 +35,7 @@
 		"	.set	mips0				\n"	\
 		"	.section .fixup,\"ax\"			\n"	\
 		"4:	li	%0, %6				\n"	\
-		"	j	2b				\n"	\
+		"	j	3b				\n"	\
 		"	.previous				\n"	\
 		"	.section __ex_table,\"a\"		\n"	\
 		"	"__UA_ADDR "\t1b, 4b			\n"	\
@@ -61,7 +61,7 @@
 		"	.set	mips0				\n"	\
 		"	.section .fixup,\"ax\"			\n"	\
 		"4:	li	%0, %6				\n"	\
-		"	j	2b				\n"	\
+		"	j	3b				\n"	\
 		"	.previous				\n"	\
 		"	.section __ex_table,\"a\"		\n"	\
 		"	"__UA_ADDR "\t1b, 4b			\n"	\
@@ -200,4 +200,4 @@
 }
 
 #endif
-#endif
+#endif /* _ASM_FUTEX_H */