Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

Pull crypto updates from Herbert Xu:
 "Here is the crypto update for 3.15:
   - Added 3DES driver for OMAP4/AM43xx
   - Added AVX2 acceleration for SHA
   - Added hash-only AEAD algorithms in caam
   - Removed tegra driver as it is not functioning and the hardware is
     too slow
   - Allow blkcipher walks over AEAD (needed for ARM)
   - Fixed unprotected FPU/SSE access in ghash-clmulni-intel
   - Fixed highmem crash in omap-sham
   - Add (zero entropy) randomness when initialising hardware RNGs
   - Fixed unaligned ahash comletion functions
   - Added soft module depedency for crc32c for initrds that use crc32c"

* git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (60 commits)
  crypto: ghash-clmulni-intel - use C implementation for setkey()
  crypto: x86/sha1 - reduce size of the AVX2 asm implementation
  crypto: x86/sha1 - fix stack alignment of AVX2 variant
  crypto: x86/sha1 - re-enable the AVX variant
  crypto: sha - SHA1 transform x86_64 AVX2
  crypto: crypto_wq - Fix late crypto work queue initialization
  crypto: caam - add missing key_dma unmap
  crypto: caam - add support for aead null encryption
  crypto: testmgr - add aead null encryption test vectors
  crypto: export NULL algorithms defines
  crypto: caam - remove error propagation handling
  crypto: hash - Simplify the ahash_finup implementation
  crypto: hash - Pull out the functions to save/restore request
  crypto: hash - Fix the pointer voodoo in unaligned ahash
  crypto: caam - Fix first parameter to caam_init_rng
  crypto: omap-sham - Map SG pages if they are HIGHMEM before accessing
  crypto: caam - Dynamic memory allocation for caam_rng_ctx object
  crypto: allow blkcipher walks over AEAD data
  crypto: remove direct blkcipher_walk dependency on transform
  hwrng: add randomness to system from rng sources
  ...
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 6ba54d6..61d6e28 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -79,6 +79,9 @@
 aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o
 ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
 sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
+ifeq ($(avx2_supported),yes)
+sha1-ssse3-y += sha1_avx2_x86_64_asm.o
+endif
 crc32c-intel-y := crc32c-intel_glue.o
 crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o
 crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o
diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c
index 50ec333..8af519e 100644
--- a/arch/x86/crypto/blowfish_glue.c
+++ b/arch/x86/crypto/blowfish_glue.c
@@ -223,9 +223,6 @@
 			src -= 1;
 			dst -= 1;
 		} while (nbytes >= bsize * 4);
-
-		if (nbytes < bsize)
-			goto done;
 	}
 
 	/* Handle leftovers */
diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c
index e6a3700..e57e20a 100644
--- a/arch/x86/crypto/cast5_avx_glue.c
+++ b/arch/x86/crypto/cast5_avx_glue.c
@@ -203,9 +203,6 @@
 			src -= 1;
 			dst -= 1;
 		} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
-
-		if (nbytes < bsize)
-			goto done;
 	}
 
 	/* Handle leftovers */
diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S
index 586f41a..185fad4 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_asm.S
+++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S
@@ -24,10 +24,6 @@
 .align 16
 .Lbswap_mask:
 	.octa 0x000102030405060708090a0b0c0d0e0f
-.Lpoly:
-	.octa 0xc2000000000000000000000000000001
-.Ltwo_one:
-	.octa 0x00000001000000000000000000000001
 
 #define DATA	%xmm0
 #define SHASH	%xmm1
@@ -134,28 +130,3 @@
 .Lupdate_just_ret:
 	ret
 ENDPROC(clmul_ghash_update)
-
-/*
- * void clmul_ghash_setkey(be128 *shash, const u8 *key);
- *
- * Calculate hash_key << 1 mod poly
- */
-ENTRY(clmul_ghash_setkey)
-	movaps .Lbswap_mask, BSWAP
-	movups (%rsi), %xmm0
-	PSHUFB_XMM BSWAP %xmm0
-	movaps %xmm0, %xmm1
-	psllq $1, %xmm0
-	psrlq $63, %xmm1
-	movaps %xmm1, %xmm2
-	pslldq $8, %xmm1
-	psrldq $8, %xmm2
-	por %xmm1, %xmm0
-	# reduction
-	pshufd $0b00100100, %xmm2, %xmm1
-	pcmpeqd .Ltwo_one, %xmm1
-	pand .Lpoly, %xmm1
-	pxor %xmm1, %xmm0
-	movups %xmm0, (%rdi)
-	ret
-ENDPROC(clmul_ghash_setkey)
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
index 6759dd1..d785cf2 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
+++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
@@ -30,8 +30,6 @@
 void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
 			const be128 *shash);
 
-void clmul_ghash_setkey(be128 *shash, const u8 *key);
-
 struct ghash_async_ctx {
 	struct cryptd_ahash *cryptd_tfm;
 };
@@ -58,13 +56,23 @@
 			const u8 *key, unsigned int keylen)
 {
 	struct ghash_ctx *ctx = crypto_shash_ctx(tfm);
+	be128 *x = (be128 *)key;
+	u64 a, b;
 
 	if (keylen != GHASH_BLOCK_SIZE) {
 		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
 		return -EINVAL;
 	}
 
-	clmul_ghash_setkey(&ctx->shash, key);
+	/* perform multiplication by 'x' in GF(2^128) */
+	a = be64_to_cpu(x->a);
+	b = be64_to_cpu(x->b);
+
+	ctx->shash.a = (__be64)((b << 1) | (a >> 63));
+	ctx->shash.b = (__be64)((a << 1) | (b >> 63));
+
+	if (a >> 63)
+		ctx->shash.b ^= cpu_to_be64(0xc2);
 
 	return 0;
 }
diff --git a/arch/x86/crypto/sha1_avx2_x86_64_asm.S b/arch/x86/crypto/sha1_avx2_x86_64_asm.S
new file mode 100644
index 0000000..1cd792d
--- /dev/null
+++ b/arch/x86/crypto/sha1_avx2_x86_64_asm.S
@@ -0,0 +1,708 @@
+/*
+ *	Implement fast SHA-1 with AVX2 instructions. (x86_64)
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * Contact Information:
+ * Ilya Albrekht <ilya.albrekht@intel.com>
+ * Maxim Locktyukhin <maxim.locktyukhin@intel.com>
+ * Ronen Zohar <ronen.zohar@intel.com>
+ * Chandramouli Narayanan <mouli@linux.intel.com>
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * SHA-1 implementation with Intel(R) AVX2 instruction set extensions.
+ *
+ *This implementation is based on the previous SSSE3 release:
+ *Visit http://software.intel.com/en-us/articles/
+ *and refer to improving-the-performance-of-the-secure-hash-algorithm-1/
+ *
+ *Updates 20-byte SHA-1 record in 'hash' for even number of
+ *'num_blocks' consecutive 64-byte blocks
+ *
+ *extern "C" void sha1_transform_avx2(
+ *	int *hash, const char* input, size_t num_blocks );
+ */
+
+#include <linux/linkage.h>
+
+#define	CTX	%rdi	/* arg1 */
+#define BUF	%rsi	/* arg2 */
+#define CNT	%rdx	/* arg3 */
+
+#define	REG_A	%ecx
+#define	REG_B	%esi
+#define	REG_C	%edi
+#define	REG_D	%eax
+#define	REG_E	%edx
+#define	REG_TB	%ebx
+#define	REG_TA	%r12d
+#define	REG_RA	%rcx
+#define	REG_RB	%rsi
+#define	REG_RC	%rdi
+#define	REG_RD	%rax
+#define	REG_RE	%rdx
+#define	REG_RTA	%r12
+#define	REG_RTB	%rbx
+#define	REG_T1	%ebp
+#define	xmm_mov	vmovups
+#define	avx2_zeroupper	vzeroupper
+#define	RND_F1	1
+#define	RND_F2	2
+#define	RND_F3	3
+
+.macro REGALLOC
+	.set A, REG_A
+	.set B, REG_B
+	.set C, REG_C
+	.set D, REG_D
+	.set E, REG_E
+	.set TB, REG_TB
+	.set TA, REG_TA
+
+	.set RA, REG_RA
+	.set RB, REG_RB
+	.set RC, REG_RC
+	.set RD, REG_RD
+	.set RE, REG_RE
+
+	.set RTA, REG_RTA
+	.set RTB, REG_RTB
+
+	.set T1, REG_T1
+.endm
+
+#define K_BASE		%r8
+#define HASH_PTR	%r9
+#define BUFFER_PTR	%r10
+#define BUFFER_PTR2	%r13
+#define BUFFER_END	%r11
+
+#define PRECALC_BUF	%r14
+#define WK_BUF		%r15
+
+#define W_TMP		%xmm0
+#define WY_TMP		%ymm0
+#define WY_TMP2		%ymm9
+
+# AVX2 variables
+#define WY0		%ymm3
+#define WY4		%ymm5
+#define WY08		%ymm7
+#define WY12		%ymm8
+#define WY16		%ymm12
+#define WY20		%ymm13
+#define WY24		%ymm14
+#define WY28		%ymm15
+
+#define YMM_SHUFB_BSWAP	%ymm10
+
+/*
+ * Keep 2 iterations precalculated at a time:
+ *    - 80 DWORDs per iteration * 2
+ */
+#define W_SIZE		(80*2*2 +16)
+
+#define WK(t)	((((t) % 80) / 4)*32 + ( (t) % 4)*4 + ((t)/80)*16 )(WK_BUF)
+#define PRECALC_WK(t)	((t)*2*2)(PRECALC_BUF)
+
+
+.macro UPDATE_HASH  hash, val
+	add	\hash, \val
+	mov	\val, \hash
+.endm
+
+.macro PRECALC_RESET_WY
+	.set WY_00, WY0
+	.set WY_04, WY4
+	.set WY_08, WY08
+	.set WY_12, WY12
+	.set WY_16, WY16
+	.set WY_20, WY20
+	.set WY_24, WY24
+	.set WY_28, WY28
+	.set WY_32, WY_00
+.endm
+
+.macro PRECALC_ROTATE_WY
+	/* Rotate macros */
+	.set WY_32, WY_28
+	.set WY_28, WY_24
+	.set WY_24, WY_20
+	.set WY_20, WY_16
+	.set WY_16, WY_12
+	.set WY_12, WY_08
+	.set WY_08, WY_04
+	.set WY_04, WY_00
+	.set WY_00, WY_32
+
+	/* Define register aliases */
+	.set WY, WY_00
+	.set WY_minus_04, WY_04
+	.set WY_minus_08, WY_08
+	.set WY_minus_12, WY_12
+	.set WY_minus_16, WY_16
+	.set WY_minus_20, WY_20
+	.set WY_minus_24, WY_24
+	.set WY_minus_28, WY_28
+	.set WY_minus_32, WY
+.endm
+
+.macro PRECALC_00_15
+	.if (i == 0) # Initialize and rotate registers
+		PRECALC_RESET_WY
+		PRECALC_ROTATE_WY
+	.endif
+
+	/* message scheduling pre-compute for rounds 0-15 */
+	.if   ((i & 7) == 0)
+		/*
+		 * blended AVX2 and ALU instruction scheduling
+		 * 1 vector iteration per 8 rounds
+		 */
+		vmovdqu ((i * 2) + PRECALC_OFFSET)(BUFFER_PTR), W_TMP
+	.elseif ((i & 7) == 1)
+		vinsertf128 $1, (((i-1) * 2)+PRECALC_OFFSET)(BUFFER_PTR2),\
+			 WY_TMP, WY_TMP
+	.elseif ((i & 7) == 2)
+		vpshufb YMM_SHUFB_BSWAP, WY_TMP, WY
+	.elseif ((i & 7) == 4)
+		vpaddd  K_XMM(K_BASE), WY, WY_TMP
+	.elseif ((i & 7) == 7)
+		vmovdqu  WY_TMP, PRECALC_WK(i&~7)
+
+		PRECALC_ROTATE_WY
+	.endif
+.endm
+
+.macro PRECALC_16_31
+	/*
+	 * message scheduling pre-compute for rounds 16-31
+	 * calculating last 32 w[i] values in 8 XMM registers
+	 * pre-calculate K+w[i] values and store to mem
+	 * for later load by ALU add instruction
+	 *
+	 * "brute force" vectorization for rounds 16-31 only
+	 * due to w[i]->w[i-3] dependency
+	 */
+	.if   ((i & 7) == 0)
+		/*
+		 * blended AVX2 and ALU instruction scheduling
+		 * 1 vector iteration per 8 rounds
+		 */
+		/* w[i-14] */
+		vpalignr	$8, WY_minus_16, WY_minus_12, WY
+		vpsrldq	$4, WY_minus_04, WY_TMP               /* w[i-3] */
+	.elseif ((i & 7) == 1)
+		vpxor	WY_minus_08, WY, WY
+		vpxor	WY_minus_16, WY_TMP, WY_TMP
+	.elseif ((i & 7) == 2)
+		vpxor	WY_TMP, WY, WY
+		vpslldq	$12, WY, WY_TMP2
+	.elseif ((i & 7) == 3)
+		vpslld	$1, WY, WY_TMP
+		vpsrld	$31, WY, WY
+	.elseif ((i & 7) == 4)
+		vpor	WY, WY_TMP, WY_TMP
+		vpslld	$2, WY_TMP2, WY
+	.elseif ((i & 7) == 5)
+		vpsrld	$30, WY_TMP2, WY_TMP2
+		vpxor	WY, WY_TMP, WY_TMP
+	.elseif ((i & 7) == 7)
+		vpxor	WY_TMP2, WY_TMP, WY
+		vpaddd	K_XMM(K_BASE), WY, WY_TMP
+		vmovdqu	WY_TMP, PRECALC_WK(i&~7)
+
+		PRECALC_ROTATE_WY
+	.endif
+.endm
+
+.macro PRECALC_32_79
+	/*
+	 * in SHA-1 specification:
+	 * w[i] = (w[i-3] ^ w[i-8]  ^ w[i-14] ^ w[i-16]) rol 1
+	 * instead we do equal:
+	 * w[i] = (w[i-6] ^ w[i-16] ^ w[i-28] ^ w[i-32]) rol 2
+	 * allows more efficient vectorization
+	 * since w[i]=>w[i-3] dependency is broken
+	 */
+
+	.if   ((i & 7) == 0)
+	/*
+	 * blended AVX2 and ALU instruction scheduling
+	 * 1 vector iteration per 8 rounds
+	 */
+		vpalignr	$8, WY_minus_08, WY_minus_04, WY_TMP
+	.elseif ((i & 7) == 1)
+		/* W is W_minus_32 before xor */
+		vpxor	WY_minus_28, WY, WY
+	.elseif ((i & 7) == 2)
+		vpxor	WY_minus_16, WY_TMP, WY_TMP
+	.elseif ((i & 7) == 3)
+		vpxor	WY_TMP, WY, WY
+	.elseif ((i & 7) == 4)
+		vpslld	$2, WY, WY_TMP
+	.elseif ((i & 7) == 5)
+		vpsrld	$30, WY, WY
+		vpor	WY, WY_TMP, WY
+	.elseif ((i & 7) == 7)
+		vpaddd	K_XMM(K_BASE), WY, WY_TMP
+		vmovdqu	WY_TMP, PRECALC_WK(i&~7)
+
+		PRECALC_ROTATE_WY
+	.endif
+.endm
+
+.macro PRECALC r, s
+	.set i, \r
+
+	.if (i < 40)
+		.set K_XMM, 32*0
+	.elseif (i < 80)
+		.set K_XMM, 32*1
+	.elseif (i < 120)
+		.set K_XMM, 32*2
+	.else
+		.set K_XMM, 32*3
+	.endif
+
+	.if (i<32)
+		PRECALC_00_15	\s
+	.elseif (i<64)
+		PRECALC_16_31	\s
+	.elseif (i < 160)
+		PRECALC_32_79	\s
+	.endif
+.endm
+
+.macro ROTATE_STATE
+	.set T_REG, E
+	.set E, D
+	.set D, C
+	.set C, B
+	.set B, TB
+	.set TB, A
+	.set A, T_REG
+
+	.set T_REG, RE
+	.set RE, RD
+	.set RD, RC
+	.set RC, RB
+	.set RB, RTB
+	.set RTB, RA
+	.set RA, T_REG
+.endm
+
+/* Macro relies on saved ROUND_Fx */
+
+.macro RND_FUN f, r
+	.if (\f == RND_F1)
+		ROUND_F1	\r
+	.elseif (\f == RND_F2)
+		ROUND_F2	\r
+	.elseif (\f == RND_F3)
+		ROUND_F3	\r
+	.endif
+.endm
+
+.macro RR r
+	.set round_id, (\r % 80)
+
+	.if (round_id == 0)        /* Precalculate F for first round */
+		.set ROUND_FUNC, RND_F1
+		mov	B, TB
+
+		rorx	$(32-30), B, B    /* b>>>2 */
+		andn	D, TB, T1
+		and	C, TB
+		xor	T1, TB
+	.endif
+
+	RND_FUN ROUND_FUNC, \r
+	ROTATE_STATE
+
+	.if   (round_id == 18)
+		.set ROUND_FUNC, RND_F2
+	.elseif (round_id == 38)
+		.set ROUND_FUNC, RND_F3
+	.elseif (round_id == 58)
+		.set ROUND_FUNC, RND_F2
+	.endif
+
+	.set round_id, ( (\r+1) % 80)
+
+	RND_FUN ROUND_FUNC, (\r+1)
+	ROTATE_STATE
+.endm
+
+.macro ROUND_F1 r
+	add	WK(\r), E
+
+	andn	C, A, T1			/* ~b&d */
+	lea	(RE,RTB), E		/* Add F from the previous round */
+
+	rorx	$(32-5), A, TA		/* T2 = A >>> 5 */
+	rorx	$(32-30),A, TB		/* b>>>2 for next round */
+
+	PRECALC	(\r)			/* msg scheduling for next 2 blocks */
+
+	/*
+	 * Calculate F for the next round
+	 * (b & c) ^ andn[b, d]
+	 */
+	and	B, A			/* b&c */
+	xor	T1, A			/* F1 = (b&c) ^ (~b&d) */
+
+	lea	(RE,RTA), E		/* E += A >>> 5 */
+.endm
+
+.macro ROUND_F2 r
+	add	WK(\r), E
+	lea	(RE,RTB), E		/* Add F from the previous round */
+
+	/* Calculate F for the next round */
+	rorx	$(32-5), A, TA		/* T2 = A >>> 5 */
+	.if ((round_id) < 79)
+		rorx	$(32-30), A, TB	/* b>>>2 for next round */
+	.endif
+	PRECALC	(\r)			/* msg scheduling for next 2 blocks */
+
+	.if ((round_id) < 79)
+		xor	B, A
+	.endif
+
+	add	TA, E			/* E += A >>> 5 */
+
+	.if ((round_id) < 79)
+		xor	C, A
+	.endif
+.endm
+
+.macro ROUND_F3 r
+	add	WK(\r), E
+	PRECALC	(\r)			/* msg scheduling for next 2 blocks */
+
+	lea	(RE,RTB), E		/* Add F from the previous round */
+
+	mov	B, T1
+	or	A, T1
+
+	rorx	$(32-5), A, TA		/* T2 = A >>> 5 */
+	rorx	$(32-30), A, TB		/* b>>>2 for next round */
+
+	/* Calculate F for the next round
+	 * (b and c) or (d and (b or c))
+	 */
+	and	C, T1
+	and	B, A
+	or	T1, A
+
+	add	TA, E			/* E += A >>> 5 */
+
+.endm
+
+/*
+ * macro implements 80 rounds of SHA-1, for multiple blocks with s/w pipelining
+ */
+.macro SHA1_PIPELINED_MAIN_BODY
+
+	REGALLOC
+
+	mov	(HASH_PTR), A
+	mov	4(HASH_PTR), B
+	mov	8(HASH_PTR), C
+	mov	12(HASH_PTR), D
+	mov	16(HASH_PTR), E
+
+	mov	%rsp, PRECALC_BUF
+	lea	(2*4*80+32)(%rsp), WK_BUF
+
+	# Precalc WK for first 2 blocks
+	PRECALC_OFFSET = 0
+	.set i, 0
+	.rept    160
+		PRECALC i
+		.set i, i + 1
+	.endr
+	PRECALC_OFFSET = 128
+	xchg	WK_BUF, PRECALC_BUF
+
+	.align 32
+_loop:
+	/*
+	 * code loops through more than one block
+	 * we use K_BASE value as a signal of a last block,
+	 * it is set below by: cmovae BUFFER_PTR, K_BASE
+	 */
+	cmp	K_BASE, BUFFER_PTR
+	jne	_begin
+	.align 32
+	jmp	_end
+	.align 32
+_begin:
+
+	/*
+	 * Do first block
+	 * rounds: 0,2,4,6,8
+	 */
+	.set j, 0
+	.rept 5
+		RR	j
+		.set j, j+2
+	.endr
+
+	jmp _loop0
+_loop0:
+
+	/*
+	 * rounds:
+	 * 10,12,14,16,18
+	 * 20,22,24,26,28
+	 * 30,32,34,36,38
+	 * 40,42,44,46,48
+	 * 50,52,54,56,58
+	 */
+	.rept 25
+		RR	j
+		.set j, j+2
+	.endr
+
+	add	$(2*64), BUFFER_PTR       /* move to next odd-64-byte block */
+	cmp	BUFFER_END, BUFFER_PTR    /* is current block the last one? */
+	cmovae	K_BASE, BUFFER_PTR	/* signal the last iteration smartly */
+
+	/*
+	 * rounds
+	 * 60,62,64,66,68
+	 * 70,72,74,76,78
+	 */
+	.rept 10
+		RR	j
+		.set j, j+2
+	.endr
+
+	UPDATE_HASH	(HASH_PTR), A
+	UPDATE_HASH	4(HASH_PTR), TB
+	UPDATE_HASH	8(HASH_PTR), C
+	UPDATE_HASH	12(HASH_PTR), D
+	UPDATE_HASH	16(HASH_PTR), E
+
+	cmp	K_BASE, BUFFER_PTR	/* is current block the last one? */
+	je	_loop
+
+	mov	TB, B
+
+	/* Process second block */
+	/*
+	 * rounds
+	 *  0+80, 2+80, 4+80, 6+80, 8+80
+	 * 10+80,12+80,14+80,16+80,18+80
+	 */
+
+	.set j, 0
+	.rept 10
+		RR	j+80
+		.set j, j+2
+	.endr
+
+	jmp	_loop1
+_loop1:
+	/*
+	 * rounds
+	 * 20+80,22+80,24+80,26+80,28+80
+	 * 30+80,32+80,34+80,36+80,38+80
+	 */
+	.rept 10
+		RR	j+80
+		.set j, j+2
+	.endr
+
+	jmp	_loop2
+_loop2:
+
+	/*
+	 * rounds
+	 * 40+80,42+80,44+80,46+80,48+80
+	 * 50+80,52+80,54+80,56+80,58+80
+	 */
+	.rept 10
+		RR	j+80
+		.set j, j+2
+	.endr
+
+	add	$(2*64), BUFFER_PTR2      /* move to next even-64-byte block */
+
+	cmp	BUFFER_END, BUFFER_PTR2   /* is current block the last one */
+	cmovae	K_BASE, BUFFER_PTR       /* signal the last iteration smartly */
+
+	jmp	_loop3
+_loop3:
+
+	/*
+	 * rounds
+	 * 60+80,62+80,64+80,66+80,68+80
+	 * 70+80,72+80,74+80,76+80,78+80
+	 */
+	.rept 10
+		RR	j+80
+		.set j, j+2
+	.endr
+
+	UPDATE_HASH	(HASH_PTR), A
+	UPDATE_HASH	4(HASH_PTR), TB
+	UPDATE_HASH	8(HASH_PTR), C
+	UPDATE_HASH	12(HASH_PTR), D
+	UPDATE_HASH	16(HASH_PTR), E
+
+	/* Reset state for AVX2 reg permutation */
+	mov	A, TA
+	mov	TB, A
+	mov	C, TB
+	mov	E, C
+	mov	D, B
+	mov	TA, D
+
+	REGALLOC
+
+	xchg	WK_BUF, PRECALC_BUF
+
+	jmp	_loop
+
+	.align 32
+	_end:
+
+.endm
+/*
+ * macro implements SHA-1 function's body for several 64-byte blocks
+ * param: function's name
+ */
+.macro SHA1_VECTOR_ASM  name
+	ENTRY(\name)
+
+	push	%rbx
+	push	%rbp
+	push	%r12
+	push	%r13
+	push	%r14
+	push	%r15
+
+	RESERVE_STACK  = (W_SIZE*4 + 8+24)
+
+	/* Align stack */
+	mov	%rsp, %rbx
+	and	$~(0x20-1), %rsp
+	push	%rbx
+	sub	$RESERVE_STACK, %rsp
+
+	avx2_zeroupper
+
+	lea	K_XMM_AR(%rip), K_BASE
+
+	mov	CTX, HASH_PTR
+	mov	BUF, BUFFER_PTR
+	lea	64(BUF), BUFFER_PTR2
+
+	shl	$6, CNT			/* mul by 64 */
+	add	BUF, CNT
+	add	$64, CNT
+	mov	CNT, BUFFER_END
+
+	cmp	BUFFER_END, BUFFER_PTR2
+	cmovae	K_BASE, BUFFER_PTR2
+
+	xmm_mov	BSWAP_SHUFB_CTL(%rip), YMM_SHUFB_BSWAP
+
+	SHA1_PIPELINED_MAIN_BODY
+
+	avx2_zeroupper
+
+	add	$RESERVE_STACK, %rsp
+	pop	%rsp
+
+	pop	%r15
+	pop	%r14
+	pop	%r13
+	pop	%r12
+	pop	%rbp
+	pop	%rbx
+
+	ret
+
+	ENDPROC(\name)
+.endm
+
+.section .rodata
+
+#define K1 0x5a827999
+#define K2 0x6ed9eba1
+#define K3 0x8f1bbcdc
+#define K4 0xca62c1d6
+
+.align 128
+K_XMM_AR:
+	.long K1, K1, K1, K1
+	.long K1, K1, K1, K1
+	.long K2, K2, K2, K2
+	.long K2, K2, K2, K2
+	.long K3, K3, K3, K3
+	.long K3, K3, K3, K3
+	.long K4, K4, K4, K4
+	.long K4, K4, K4, K4
+
+BSWAP_SHUFB_CTL:
+	.long 0x00010203
+	.long 0x04050607
+	.long 0x08090a0b
+	.long 0x0c0d0e0f
+	.long 0x00010203
+	.long 0x04050607
+	.long 0x08090a0b
+	.long 0x0c0d0e0f
+.text
+
+SHA1_VECTOR_ASM     sha1_transform_avx2
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c
index 4a11a9d..74d16ef 100644
--- a/arch/x86/crypto/sha1_ssse3_glue.c
+++ b/arch/x86/crypto/sha1_ssse3_glue.c
@@ -10,6 +10,7 @@
  * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
  * Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
  * Copyright (c) Mathias Krause <minipli@googlemail.com>
+ * Copyright (c) Chandramouli Narayanan <mouli@linux.intel.com>
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the Free
@@ -39,6 +40,12 @@
 asmlinkage void sha1_transform_avx(u32 *digest, const char *data,
 				   unsigned int rounds);
 #endif
+#ifdef CONFIG_AS_AVX2
+#define SHA1_AVX2_BLOCK_OPTSIZE	4	/* optimal 4*64 bytes of SHA1 blocks */
+
+asmlinkage void sha1_transform_avx2(u32 *digest, const char *data,
+				unsigned int rounds);
+#endif
 
 static asmlinkage void (*sha1_transform_asm)(u32 *, const char *, unsigned int);
 
@@ -165,6 +172,18 @@
 	return 0;
 }
 
+#ifdef CONFIG_AS_AVX2
+static void sha1_apply_transform_avx2(u32 *digest, const char *data,
+				unsigned int rounds)
+{
+	/* Select the optimal transform based on data block size */
+	if (rounds >= SHA1_AVX2_BLOCK_OPTSIZE)
+		sha1_transform_avx2(digest, data, rounds);
+	else
+		sha1_transform_avx(digest, data, rounds);
+}
+#endif
+
 static struct shash_alg alg = {
 	.digestsize	=	SHA1_DIGEST_SIZE,
 	.init		=	sha1_ssse3_init,
@@ -201,27 +220,49 @@
 
 	return true;
 }
+
+#ifdef CONFIG_AS_AVX2
+static bool __init avx2_usable(void)
+{
+	if (avx_usable() && cpu_has_avx2 && boot_cpu_has(X86_FEATURE_BMI1) &&
+	    boot_cpu_has(X86_FEATURE_BMI2))
+		return true;
+
+	return false;
+}
+#endif
 #endif
 
 static int __init sha1_ssse3_mod_init(void)
 {
+	char *algo_name;
+
 	/* test for SSSE3 first */
-	if (cpu_has_ssse3)
+	if (cpu_has_ssse3) {
 		sha1_transform_asm = sha1_transform_ssse3;
+		algo_name = "SSSE3";
+	}
 
 #ifdef CONFIG_AS_AVX
 	/* allow AVX to override SSSE3, it's a little faster */
-	if (avx_usable())
+	if (avx_usable()) {
 		sha1_transform_asm = sha1_transform_avx;
+		algo_name = "AVX";
+#ifdef CONFIG_AS_AVX2
+		/* allow AVX2 to override AVX, it's a little faster */
+		if (avx2_usable()) {
+			sha1_transform_asm = sha1_apply_transform_avx2;
+			algo_name = "AVX2";
+		}
+#endif
+	}
 #endif
 
 	if (sha1_transform_asm) {
-		pr_info("Using %s optimized SHA-1 implementation\n",
-		        sha1_transform_asm == sha1_transform_ssse3 ? "SSSE3"
-		                                                   : "AVX");
+		pr_info("Using %s optimized SHA-1 implementation\n", algo_name);
 		return crypto_register_shash(&alg);
 	}
-	pr_info("Neither AVX nor SSSE3 is available/usable.\n");
+	pr_info("Neither AVX nor AVX2 nor SSSE3 is available/usable.\n");
 
 	return -ENODEV;
 }
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 7bcb70d..ce4012a 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -491,14 +491,14 @@
 	  SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2).
 
 config CRYPTO_SHA1_SSSE3
-	tristate "SHA1 digest algorithm (SSSE3/AVX)"
+	tristate "SHA1 digest algorithm (SSSE3/AVX/AVX2)"
 	depends on X86 && 64BIT
 	select CRYPTO_SHA1
 	select CRYPTO_HASH
 	help
 	  SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
 	  using Supplemental SSE3 (SSSE3) instructions or Advanced Vector
-	  Extensions (AVX), when available.
+	  Extensions (AVX/AVX2), when available.
 
 config CRYPTO_SHA256_SSSE3
 	tristate "SHA256 digest algorithm (SSSE3/AVX/AVX2)"
diff --git a/crypto/Makefile b/crypto/Makefile
index b29402a..38e64231 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -81,7 +81,7 @@
 obj-$(CONFIG_CRYPTO_DEFLATE) += deflate.o
 obj-$(CONFIG_CRYPTO_ZLIB) += zlib.o
 obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o
-obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o
+obj-$(CONFIG_CRYPTO_CRC32C) += crc32c_generic.o
 obj-$(CONFIG_CRYPTO_CRC32) += crc32.o
 obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif_common.o crct10dif_generic.o
 obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o
diff --git a/crypto/ahash.c b/crypto/ahash.c
index a92dc38..6e72233 100644
--- a/crypto/ahash.c
+++ b/crypto/ahash.c
@@ -190,6 +190,75 @@
 	return len + (mask & ~(crypto_tfm_ctx_alignment() - 1));
 }
 
+static int ahash_save_req(struct ahash_request *req, crypto_completion_t cplt)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	unsigned long alignmask = crypto_ahash_alignmask(tfm);
+	unsigned int ds = crypto_ahash_digestsize(tfm);
+	struct ahash_request_priv *priv;
+
+	priv = kmalloc(sizeof(*priv) + ahash_align_buffer_size(ds, alignmask),
+		       (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		       GFP_KERNEL : GFP_ATOMIC);
+	if (!priv)
+		return -ENOMEM;
+
+	/*
+	 * WARNING: Voodoo programming below!
+	 *
+	 * The code below is obscure and hard to understand, thus explanation
+	 * is necessary. See include/crypto/hash.h and include/linux/crypto.h
+	 * to understand the layout of structures used here!
+	 *
+	 * The code here will replace portions of the ORIGINAL request with
+	 * pointers to new code and buffers so the hashing operation can store
+	 * the result in aligned buffer. We will call the modified request
+	 * an ADJUSTED request.
+	 *
+	 * The newly mangled request will look as such:
+	 *
+	 * req {
+	 *   .result        = ADJUSTED[new aligned buffer]
+	 *   .base.complete = ADJUSTED[pointer to completion function]
+	 *   .base.data     = ADJUSTED[*req (pointer to self)]
+	 *   .priv          = ADJUSTED[new priv] {
+	 *           .result   = ORIGINAL(result)
+	 *           .complete = ORIGINAL(base.complete)
+	 *           .data     = ORIGINAL(base.data)
+	 *   }
+	 */
+
+	priv->result = req->result;
+	priv->complete = req->base.complete;
+	priv->data = req->base.data;
+	/*
+	 * WARNING: We do not backup req->priv here! The req->priv
+	 *          is for internal use of the Crypto API and the
+	 *          user must _NOT_ _EVER_ depend on it's content!
+	 */
+
+	req->result = PTR_ALIGN((u8 *)priv->ubuf, alignmask + 1);
+	req->base.complete = cplt;
+	req->base.data = req;
+	req->priv = priv;
+
+	return 0;
+}
+
+static void ahash_restore_req(struct ahash_request *req)
+{
+	struct ahash_request_priv *priv = req->priv;
+
+	/* Restore the original crypto request. */
+	req->result = priv->result;
+	req->base.complete = priv->complete;
+	req->base.data = priv->data;
+	req->priv = NULL;
+
+	/* Free the req->priv.priv from the ADJUSTED request. */
+	kzfree(priv);
+}
+
 static void ahash_op_unaligned_finish(struct ahash_request *req, int err)
 {
 	struct ahash_request_priv *priv = req->priv;
@@ -201,47 +270,37 @@
 		memcpy(priv->result, req->result,
 		       crypto_ahash_digestsize(crypto_ahash_reqtfm(req)));
 
-	kzfree(priv);
+	ahash_restore_req(req);
 }
 
 static void ahash_op_unaligned_done(struct crypto_async_request *req, int err)
 {
 	struct ahash_request *areq = req->data;
-	struct ahash_request_priv *priv = areq->priv;
-	crypto_completion_t complete = priv->complete;
-	void *data = priv->data;
 
+	/*
+	 * Restore the original request, see ahash_op_unaligned() for what
+	 * goes where.
+	 *
+	 * The "struct ahash_request *req" here is in fact the "req.base"
+	 * from the ADJUSTED request from ahash_op_unaligned(), thus as it
+	 * is a pointer to self, it is also the ADJUSTED "req" .
+	 */
+
+	/* First copy req->result into req->priv.result */
 	ahash_op_unaligned_finish(areq, err);
 
-	areq->base.complete = complete;
-	areq->base.data = data;
-
-	complete(&areq->base, err);
+	/* Complete the ORIGINAL request. */
+	areq->base.complete(&areq->base, err);
 }
 
 static int ahash_op_unaligned(struct ahash_request *req,
 			      int (*op)(struct ahash_request *))
 {
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	unsigned long alignmask = crypto_ahash_alignmask(tfm);
-	unsigned int ds = crypto_ahash_digestsize(tfm);
-	struct ahash_request_priv *priv;
 	int err;
 
-	priv = kmalloc(sizeof(*priv) + ahash_align_buffer_size(ds, alignmask),
-		       (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
-		       GFP_KERNEL : GFP_ATOMIC);
-	if (!priv)
-		return -ENOMEM;
-
-	priv->result = req->result;
-	priv->complete = req->base.complete;
-	priv->data = req->base.data;
-
-	req->result = PTR_ALIGN((u8 *)priv->ubuf, alignmask + 1);
-	req->base.complete = ahash_op_unaligned_done;
-	req->base.data = req;
-	req->priv = priv;
+	err = ahash_save_req(req, ahash_op_unaligned_done);
+	if (err)
+		return err;
 
 	err = op(req);
 	ahash_op_unaligned_finish(req, err);
@@ -290,19 +349,16 @@
 		memcpy(priv->result, req->result,
 		       crypto_ahash_digestsize(crypto_ahash_reqtfm(req)));
 
-	kzfree(priv);
+	ahash_restore_req(req);
 }
 
 static void ahash_def_finup_done2(struct crypto_async_request *req, int err)
 {
 	struct ahash_request *areq = req->data;
-	struct ahash_request_priv *priv = areq->priv;
-	crypto_completion_t complete = priv->complete;
-	void *data = priv->data;
 
 	ahash_def_finup_finish2(areq, err);
 
-	complete(data, err);
+	areq->base.complete(&areq->base, err);
 }
 
 static int ahash_def_finup_finish1(struct ahash_request *req, int err)
@@ -322,38 +378,23 @@
 static void ahash_def_finup_done1(struct crypto_async_request *req, int err)
 {
 	struct ahash_request *areq = req->data;
-	struct ahash_request_priv *priv = areq->priv;
-	crypto_completion_t complete = priv->complete;
-	void *data = priv->data;
 
 	err = ahash_def_finup_finish1(areq, err);
 
-	complete(data, err);
+	areq->base.complete(&areq->base, err);
 }
 
 static int ahash_def_finup(struct ahash_request *req)
 {
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	unsigned long alignmask = crypto_ahash_alignmask(tfm);
-	unsigned int ds = crypto_ahash_digestsize(tfm);
-	struct ahash_request_priv *priv;
+	int err;
 
-	priv = kmalloc(sizeof(*priv) + ahash_align_buffer_size(ds, alignmask),
-		       (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
-		       GFP_KERNEL : GFP_ATOMIC);
-	if (!priv)
-		return -ENOMEM;
+	err = ahash_save_req(req, ahash_def_finup_done1);
+	if (err)
+		return err;
 
-	priv->result = req->result;
-	priv->complete = req->base.complete;
-	priv->data = req->base.data;
-
-	req->result = PTR_ALIGN((u8 *)priv->ubuf, alignmask + 1);
-	req->base.complete = ahash_def_finup_done1;
-	req->base.data = req;
-	req->priv = priv;
-
-	return ahash_def_finup_finish1(req, tfm->update(req));
+	err = tfm->update(req);
+	return ahash_def_finup_finish1(req, err);
 }
 
 static int ahash_no_export(struct ahash_request *req, void *out)
diff --git a/crypto/blkcipher.c b/crypto/blkcipher.c
index a79e7e9..0122bec 100644
--- a/crypto/blkcipher.c
+++ b/crypto/blkcipher.c
@@ -70,14 +70,12 @@
 	return max(start, end_page);
 }
 
-static inline unsigned int blkcipher_done_slow(struct crypto_blkcipher *tfm,
-					       struct blkcipher_walk *walk,
+static inline unsigned int blkcipher_done_slow(struct blkcipher_walk *walk,
 					       unsigned int bsize)
 {
 	u8 *addr;
-	unsigned int alignmask = crypto_blkcipher_alignmask(tfm);
 
-	addr = (u8 *)ALIGN((unsigned long)walk->buffer, alignmask + 1);
+	addr = (u8 *)ALIGN((unsigned long)walk->buffer, walk->alignmask + 1);
 	addr = blkcipher_get_spot(addr, bsize);
 	scatterwalk_copychunks(addr, &walk->out, bsize, 1);
 	return bsize;
@@ -105,7 +103,6 @@
 int blkcipher_walk_done(struct blkcipher_desc *desc,
 			struct blkcipher_walk *walk, int err)
 {
-	struct crypto_blkcipher *tfm = desc->tfm;
 	unsigned int nbytes = 0;
 
 	if (likely(err >= 0)) {
@@ -117,7 +114,7 @@
 			err = -EINVAL;
 			goto err;
 		} else
-			n = blkcipher_done_slow(tfm, walk, n);
+			n = blkcipher_done_slow(walk, n);
 
 		nbytes = walk->total - n;
 		err = 0;
@@ -136,7 +133,7 @@
 	}
 
 	if (walk->iv != desc->info)
-		memcpy(desc->info, walk->iv, crypto_blkcipher_ivsize(tfm));
+		memcpy(desc->info, walk->iv, walk->ivsize);
 	if (walk->buffer != walk->page)
 		kfree(walk->buffer);
 	if (walk->page)
@@ -226,22 +223,20 @@
 static int blkcipher_walk_next(struct blkcipher_desc *desc,
 			       struct blkcipher_walk *walk)
 {
-	struct crypto_blkcipher *tfm = desc->tfm;
-	unsigned int alignmask = crypto_blkcipher_alignmask(tfm);
 	unsigned int bsize;
 	unsigned int n;
 	int err;
 
 	n = walk->total;
-	if (unlikely(n < crypto_blkcipher_blocksize(tfm))) {
+	if (unlikely(n < walk->cipher_blocksize)) {
 		desc->flags |= CRYPTO_TFM_RES_BAD_BLOCK_LEN;
 		return blkcipher_walk_done(desc, walk, -EINVAL);
 	}
 
 	walk->flags &= ~(BLKCIPHER_WALK_SLOW | BLKCIPHER_WALK_COPY |
 			 BLKCIPHER_WALK_DIFF);
-	if (!scatterwalk_aligned(&walk->in, alignmask) ||
-	    !scatterwalk_aligned(&walk->out, alignmask)) {
+	if (!scatterwalk_aligned(&walk->in, walk->alignmask) ||
+	    !scatterwalk_aligned(&walk->out, walk->alignmask)) {
 		walk->flags |= BLKCIPHER_WALK_COPY;
 		if (!walk->page) {
 			walk->page = (void *)__get_free_page(GFP_ATOMIC);
@@ -250,12 +245,12 @@
 		}
 	}
 
-	bsize = min(walk->blocksize, n);
+	bsize = min(walk->walk_blocksize, n);
 	n = scatterwalk_clamp(&walk->in, n);
 	n = scatterwalk_clamp(&walk->out, n);
 
 	if (unlikely(n < bsize)) {
-		err = blkcipher_next_slow(desc, walk, bsize, alignmask);
+		err = blkcipher_next_slow(desc, walk, bsize, walk->alignmask);
 		goto set_phys_lowmem;
 	}
 
@@ -277,28 +272,26 @@
 	return err;
 }
 
-static inline int blkcipher_copy_iv(struct blkcipher_walk *walk,
-				    struct crypto_blkcipher *tfm,
-				    unsigned int alignmask)
+static inline int blkcipher_copy_iv(struct blkcipher_walk *walk)
 {
-	unsigned bs = walk->blocksize;
-	unsigned int ivsize = crypto_blkcipher_ivsize(tfm);
-	unsigned aligned_bs = ALIGN(bs, alignmask + 1);
-	unsigned int size = aligned_bs * 2 + ivsize + max(aligned_bs, ivsize) -
-			    (alignmask + 1);
+	unsigned bs = walk->walk_blocksize;
+	unsigned aligned_bs = ALIGN(bs, walk->alignmask + 1);
+	unsigned int size = aligned_bs * 2 +
+			    walk->ivsize + max(aligned_bs, walk->ivsize) -
+			    (walk->alignmask + 1);
 	u8 *iv;
 
-	size += alignmask & ~(crypto_tfm_ctx_alignment() - 1);
+	size += walk->alignmask & ~(crypto_tfm_ctx_alignment() - 1);
 	walk->buffer = kmalloc(size, GFP_ATOMIC);
 	if (!walk->buffer)
 		return -ENOMEM;
 
-	iv = (u8 *)ALIGN((unsigned long)walk->buffer, alignmask + 1);
+	iv = (u8 *)ALIGN((unsigned long)walk->buffer, walk->alignmask + 1);
 	iv = blkcipher_get_spot(iv, bs) + aligned_bs;
 	iv = blkcipher_get_spot(iv, bs) + aligned_bs;
-	iv = blkcipher_get_spot(iv, ivsize);
+	iv = blkcipher_get_spot(iv, walk->ivsize);
 
-	walk->iv = memcpy(iv, walk->iv, ivsize);
+	walk->iv = memcpy(iv, walk->iv, walk->ivsize);
 	return 0;
 }
 
@@ -306,7 +299,10 @@
 			struct blkcipher_walk *walk)
 {
 	walk->flags &= ~BLKCIPHER_WALK_PHYS;
-	walk->blocksize = crypto_blkcipher_blocksize(desc->tfm);
+	walk->walk_blocksize = crypto_blkcipher_blocksize(desc->tfm);
+	walk->cipher_blocksize = walk->walk_blocksize;
+	walk->ivsize = crypto_blkcipher_ivsize(desc->tfm);
+	walk->alignmask = crypto_blkcipher_alignmask(desc->tfm);
 	return blkcipher_walk_first(desc, walk);
 }
 EXPORT_SYMBOL_GPL(blkcipher_walk_virt);
@@ -315,7 +311,10 @@
 			struct blkcipher_walk *walk)
 {
 	walk->flags |= BLKCIPHER_WALK_PHYS;
-	walk->blocksize = crypto_blkcipher_blocksize(desc->tfm);
+	walk->walk_blocksize = crypto_blkcipher_blocksize(desc->tfm);
+	walk->cipher_blocksize = walk->walk_blocksize;
+	walk->ivsize = crypto_blkcipher_ivsize(desc->tfm);
+	walk->alignmask = crypto_blkcipher_alignmask(desc->tfm);
 	return blkcipher_walk_first(desc, walk);
 }
 EXPORT_SYMBOL_GPL(blkcipher_walk_phys);
@@ -323,9 +322,6 @@
 static int blkcipher_walk_first(struct blkcipher_desc *desc,
 				struct blkcipher_walk *walk)
 {
-	struct crypto_blkcipher *tfm = desc->tfm;
-	unsigned int alignmask = crypto_blkcipher_alignmask(tfm);
-
 	if (WARN_ON_ONCE(in_irq()))
 		return -EDEADLK;
 
@@ -335,8 +331,8 @@
 
 	walk->buffer = NULL;
 	walk->iv = desc->info;
-	if (unlikely(((unsigned long)walk->iv & alignmask))) {
-		int err = blkcipher_copy_iv(walk, tfm, alignmask);
+	if (unlikely(((unsigned long)walk->iv & walk->alignmask))) {
+		int err = blkcipher_copy_iv(walk);
 		if (err)
 			return err;
 	}
@@ -353,11 +349,28 @@
 			      unsigned int blocksize)
 {
 	walk->flags &= ~BLKCIPHER_WALK_PHYS;
-	walk->blocksize = blocksize;
+	walk->walk_blocksize = blocksize;
+	walk->cipher_blocksize = crypto_blkcipher_blocksize(desc->tfm);
+	walk->ivsize = crypto_blkcipher_ivsize(desc->tfm);
+	walk->alignmask = crypto_blkcipher_alignmask(desc->tfm);
 	return blkcipher_walk_first(desc, walk);
 }
 EXPORT_SYMBOL_GPL(blkcipher_walk_virt_block);
 
+int blkcipher_aead_walk_virt_block(struct blkcipher_desc *desc,
+				   struct blkcipher_walk *walk,
+				   struct crypto_aead *tfm,
+				   unsigned int blocksize)
+{
+	walk->flags &= ~BLKCIPHER_WALK_PHYS;
+	walk->walk_blocksize = blocksize;
+	walk->cipher_blocksize = crypto_aead_blocksize(tfm);
+	walk->ivsize = crypto_aead_ivsize(tfm);
+	walk->alignmask = crypto_aead_alignmask(tfm);
+	return blkcipher_walk_first(desc, walk);
+}
+EXPORT_SYMBOL_GPL(blkcipher_aead_walk_virt_block);
+
 static int setkey_unaligned(struct crypto_tfm *tfm, const u8 *key,
 			    unsigned int keylen)
 {
diff --git a/crypto/crc32c.c b/crypto/crc32c_generic.c
similarity index 98%
rename from crypto/crc32c.c
rename to crypto/crc32c_generic.c
index 06f7018..d9c7beb 100644
--- a/crypto/crc32c.c
+++ b/crypto/crc32c_generic.c
@@ -170,3 +170,5 @@
 MODULE_AUTHOR("Clay Haapala <chaapala@cisco.com>");
 MODULE_DESCRIPTION("CRC32c (Castagnoli) calculations wrapper for lib/crc32c");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("crc32c");
+MODULE_SOFTDEP("pre: crc32c");
diff --git a/crypto/crypto_null.c b/crypto/crypto_null.c
index fee7265..1dc54bb 100644
--- a/crypto/crypto_null.c
+++ b/crypto/crypto_null.c
@@ -17,6 +17,7 @@
  *
  */
 
+#include <crypto/null.h>
 #include <crypto/internal/hash.h>
 #include <crypto/internal/skcipher.h>
 #include <linux/init.h>
@@ -24,11 +25,6 @@
 #include <linux/mm.h>
 #include <linux/string.h>
 
-#define NULL_KEY_SIZE		0
-#define NULL_BLOCK_SIZE		1
-#define NULL_DIGEST_SIZE	0
-#define NULL_IV_SIZE		0
-
 static int null_compress(struct crypto_tfm *tfm, const u8 *src,
 			 unsigned int slen, u8 *dst, unsigned int *dlen)
 {
diff --git a/crypto/crypto_wq.c b/crypto/crypto_wq.c
index adad92a..2f1b8d1 100644
--- a/crypto/crypto_wq.c
+++ b/crypto/crypto_wq.c
@@ -33,7 +33,7 @@
 	destroy_workqueue(kcrypto_wq);
 }
 
-module_init(crypto_wq_init);
+subsys_initcall(crypto_wq_init);
 module_exit(crypto_wq_exit);
 
 MODULE_LICENSE("GPL");
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 0d9003a..870be7b 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -1511,6 +1511,14 @@
 		ret += tcrypt_test("authenc(hmac(sha1),cbc(aes))");
 		break;
 
+	case 156:
+		ret += tcrypt_test("authenc(hmac(md5),ecb(cipher_null))");
+		break;
+
+	case 157:
+		ret += tcrypt_test("authenc(hmac(sha1),ecb(cipher_null))");
+		break;
+
 	case 200:
 		test_cipher_speed("ecb(aes)", ENCRYPT, sec, NULL, 0,
 				speed_template_16_24_32);
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 7795550..dc3cf35 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -1809,6 +1809,22 @@
 			}
 		}
 	}, {
+		.alg = "authenc(hmac(md5),ecb(cipher_null))",
+		.test = alg_test_aead,
+		.fips_allowed = 1,
+		.suite = {
+			.aead = {
+				.enc = {
+					.vecs = hmac_md5_ecb_cipher_null_enc_tv_template,
+					.count = HMAC_MD5_ECB_CIPHER_NULL_ENC_TEST_VECTORS
+				},
+				.dec = {
+					.vecs = hmac_md5_ecb_cipher_null_dec_tv_template,
+					.count = HMAC_MD5_ECB_CIPHER_NULL_DEC_TEST_VECTORS
+				}
+			}
+		}
+	}, {
 		.alg = "authenc(hmac(sha1),cbc(aes))",
 		.test = alg_test_aead,
 		.fips_allowed = 1,
@@ -1821,6 +1837,22 @@
 			}
 		}
 	}, {
+		.alg = "authenc(hmac(sha1),ecb(cipher_null))",
+		.test = alg_test_aead,
+		.fips_allowed = 1,
+		.suite = {
+			.aead = {
+				.enc = {
+					.vecs = hmac_sha1_ecb_cipher_null_enc_tv_template,
+					.count = HMAC_SHA1_ECB_CIPHER_NULL_ENC_TEST_VECTORS
+				},
+				.dec = {
+					.vecs = hmac_sha1_ecb_cipher_null_dec_tv_template,
+					.count = HMAC_SHA1_ECB_CIPHER_NULL_DEC_TEST_VECTORS
+				}
+			}
+		}
+	}, {
 		.alg = "authenc(hmac(sha256),cbc(aes))",
 		.test = alg_test_aead,
 		.fips_allowed = 1,
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 7d44aa3..3db83dbb 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -12821,6 +12821,10 @@
 #define AES_DEC_TEST_VECTORS 4
 #define AES_CBC_ENC_TEST_VECTORS 5
 #define AES_CBC_DEC_TEST_VECTORS 5
+#define HMAC_MD5_ECB_CIPHER_NULL_ENC_TEST_VECTORS 2
+#define HMAC_MD5_ECB_CIPHER_NULL_DEC_TEST_VECTORS 2
+#define HMAC_SHA1_ECB_CIPHER_NULL_ENC_TEST_VECTORS 2
+#define HMAC_SHA1_ECB_CIPHER_NULL_DEC_TEST_VECTORS 2
 #define HMAC_SHA1_AES_CBC_ENC_TEST_VECTORS 7
 #define HMAC_SHA256_AES_CBC_ENC_TEST_VECTORS 7
 #define HMAC_SHA512_AES_CBC_ENC_TEST_VECTORS 7
@@ -13627,6 +13631,90 @@
 	},
 };
 
+static struct aead_testvec hmac_md5_ecb_cipher_null_enc_tv_template[] = {
+	{ /* Input data from RFC 2410 Case 1 */
+#ifdef __LITTLE_ENDIAN
+		.key    = "\x08\x00"		/* rta length */
+			  "\x01\x00"		/* rta type */
+#else
+		.key    = "\x00\x08"		/* rta length */
+			  "\x00\x01"		/* rta type */
+#endif
+			  "\x00\x00\x00\x00"	/* enc key length */
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.klen   = 8 + 16 + 0,
+		.iv     = "",
+		.input  = "\x01\x23\x45\x67\x89\xab\xcd\xef",
+		.ilen   = 8,
+		.result = "\x01\x23\x45\x67\x89\xab\xcd\xef"
+			  "\xaa\x42\xfe\x43\x8d\xea\xa3\x5a"
+			  "\xb9\x3d\x9f\xb1\xa3\x8e\x9b\xae",
+		.rlen   = 8 + 16,
+	}, { /* Input data from RFC 2410 Case 2 */
+#ifdef __LITTLE_ENDIAN
+		.key    = "\x08\x00"		/* rta length */
+			  "\x01\x00"		/* rta type */
+#else
+		.key    = "\x00\x08"		/* rta length */
+			  "\x00\x01"		/* rta type */
+#endif
+			  "\x00\x00\x00\x00"	/* enc key length */
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.klen   = 8 + 16 + 0,
+		.iv     = "",
+		.input  = "Network Security People Have A Strange Sense Of Humor",
+		.ilen   = 53,
+		.result = "Network Security People Have A Strange Sense Of Humor"
+			  "\x73\xa5\x3e\x1c\x08\x0e\x8a\x8a"
+			  "\x8e\xb5\x5f\x90\x8e\xfe\x13\x23",
+		.rlen   = 53 + 16,
+	},
+};
+
+static struct aead_testvec hmac_md5_ecb_cipher_null_dec_tv_template[] = {
+	{
+#ifdef __LITTLE_ENDIAN
+		.key    = "\x08\x00"		/* rta length */
+			  "\x01\x00"		/* rta type */
+#else
+		.key    = "\x00\x08"		/* rta length */
+			  "\x00\x01"		/* rta type */
+#endif
+			  "\x00\x00\x00\x00"	/* enc key length */
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.klen   = 8 + 16 + 0,
+		.iv     = "",
+		.input  = "\x01\x23\x45\x67\x89\xab\xcd\xef"
+			  "\xaa\x42\xfe\x43\x8d\xea\xa3\x5a"
+			  "\xb9\x3d\x9f\xb1\xa3\x8e\x9b\xae",
+		.ilen   = 8 + 16,
+		.result = "\x01\x23\x45\x67\x89\xab\xcd\xef",
+		.rlen   = 8,
+	}, {
+#ifdef __LITTLE_ENDIAN
+		.key    = "\x08\x00"		/* rta length */
+			  "\x01\x00"		/* rta type */
+#else
+		.key    = "\x00\x08"		/* rta length */
+			  "\x00\x01"		/* rta type */
+#endif
+			  "\x00\x00\x00\x00"	/* enc key length */
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.klen   = 8 + 16 + 0,
+		.iv     = "",
+		.input  = "Network Security People Have A Strange Sense Of Humor"
+			  "\x73\xa5\x3e\x1c\x08\x0e\x8a\x8a"
+			  "\x8e\xb5\x5f\x90\x8e\xfe\x13\x23",
+		.ilen   = 53 + 16,
+		.result = "Network Security People Have A Strange Sense Of Humor",
+		.rlen   = 53,
+	},
+};
+
 static struct aead_testvec hmac_sha1_aes_cbc_enc_tv_template[] = {
 	{ /* RFC 3602 Case 1 */
 #ifdef __LITTLE_ENDIAN
@@ -13876,6 +13964,98 @@
 	},
 };
 
+static struct aead_testvec hmac_sha1_ecb_cipher_null_enc_tv_template[] = {
+	{ /* Input data from RFC 2410 Case 1 */
+#ifdef __LITTLE_ENDIAN
+		.key    = "\x08\x00"		/* rta length */
+			  "\x01\x00"		/* rta type */
+#else
+		.key    = "\x00\x08"		/* rta length */
+			  "\x00\x01"		/* rta type */
+#endif
+			  "\x00\x00\x00\x00"	/* enc key length */
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00",
+		.klen   = 8 + 20 + 0,
+		.iv     = "",
+		.input  = "\x01\x23\x45\x67\x89\xab\xcd\xef",
+		.ilen   = 8,
+		.result = "\x01\x23\x45\x67\x89\xab\xcd\xef"
+			  "\x40\xc3\x0a\xa1\xc9\xa0\x28\xab"
+			  "\x99\x5e\x19\x04\xd1\x72\xef\xb8"
+			  "\x8c\x5e\xe4\x08",
+		.rlen   = 8 + 20,
+	}, { /* Input data from RFC 2410 Case 2 */
+#ifdef __LITTLE_ENDIAN
+		.key    = "\x08\x00"		/* rta length */
+			  "\x01\x00"		/* rta type */
+#else
+		.key    = "\x00\x08"		/* rta length */
+			  "\x00\x01"		/* rta type */
+#endif
+			  "\x00\x00\x00\x00"	/* enc key length */
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00",
+		.klen   = 8 + 20 + 0,
+		.iv     = "",
+		.input  = "Network Security People Have A Strange Sense Of Humor",
+		.ilen   = 53,
+		.result = "Network Security People Have A Strange Sense Of Humor"
+			  "\x75\x6f\x42\x1e\xf8\x50\x21\xd2"
+			  "\x65\x47\xee\x8e\x1a\xef\x16\xf6"
+			  "\x91\x56\xe4\xd6",
+		.rlen   = 53 + 20,
+	},
+};
+
+static struct aead_testvec hmac_sha1_ecb_cipher_null_dec_tv_template[] = {
+	{
+#ifdef __LITTLE_ENDIAN
+		.key    = "\x08\x00"		/* rta length */
+			  "\x01\x00"		/* rta type */
+#else
+		.key    = "\x00\x08"		/* rta length */
+			  "\x00\x01"		/* rta type */
+#endif
+			  "\x00\x00\x00\x00"	/* enc key length */
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00",
+		.klen   = 8 + 20 + 0,
+		.iv     = "",
+		.input  = "\x01\x23\x45\x67\x89\xab\xcd\xef"
+			  "\x40\xc3\x0a\xa1\xc9\xa0\x28\xab"
+			  "\x99\x5e\x19\x04\xd1\x72\xef\xb8"
+			  "\x8c\x5e\xe4\x08",
+		.ilen   = 8 + 20,
+		.result = "\x01\x23\x45\x67\x89\xab\xcd\xef",
+		.rlen   = 8,
+	}, {
+#ifdef __LITTLE_ENDIAN
+		.key    = "\x08\x00"		/* rta length */
+			  "\x01\x00"		/* rta type */
+#else
+		.key    = "\x00\x08"		/* rta length */
+			  "\x00\x01"		/* rta type */
+#endif
+			  "\x00\x00\x00\x00"	/* enc key length */
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00",
+		.klen   = 8 + 20 + 0,
+		.iv     = "",
+		.input  = "Network Security People Have A Strange Sense Of Humor"
+			  "\x75\x6f\x42\x1e\xf8\x50\x21\xd2"
+			  "\x65\x47\xee\x8e\x1a\xef\x16\xf6"
+			  "\x91\x56\xe4\xd6",
+		.ilen   = 53 + 20,
+		.result = "Network Security People Have A Strange Sense Of Humor",
+		.rlen   = 53,
+	},
+};
+
 static struct aead_testvec hmac_sha256_aes_cbc_enc_tv_template[] = {
 	{ /* RFC 3602 Case 1 */
 #ifdef __LITTLE_ENDIAN
diff --git a/drivers/char/hw_random/atmel-rng.c b/drivers/char/hw_random/atmel-rng.c
index bf9fc6b..851bc7e 100644
--- a/drivers/char/hw_random/atmel-rng.c
+++ b/drivers/char/hw_random/atmel-rng.c
@@ -54,29 +54,22 @@
 	struct resource *res;
 	int ret;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res)
-		return -EINVAL;
-
 	trng = devm_kzalloc(&pdev->dev, sizeof(*trng), GFP_KERNEL);
 	if (!trng)
 		return -ENOMEM;
 
-	if (!devm_request_mem_region(&pdev->dev, res->start,
-				     resource_size(res), pdev->name))
-		return -EBUSY;
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	trng->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(trng->base))
+		return PTR_ERR(trng->base);
 
-	trng->base = devm_ioremap(&pdev->dev, res->start, resource_size(res));
-	if (!trng->base)
-		return -EBUSY;
-
-	trng->clk = clk_get(&pdev->dev, NULL);
+	trng->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(trng->clk))
 		return PTR_ERR(trng->clk);
 
 	ret = clk_enable(trng->clk);
 	if (ret)
-		goto err_enable;
+		return ret;
 
 	writel(TRNG_KEY | 1, trng->base + TRNG_CR);
 	trng->rng.name = pdev->name;
@@ -92,9 +85,6 @@
 
 err_register:
 	clk_disable(trng->clk);
-err_enable:
-	clk_put(trng->clk);
-
 	return ret;
 }
 
@@ -106,7 +96,6 @@
 
 	writel(TRNG_KEY, trng->base + TRNG_CR);
 	clk_disable(trng->clk);
-	clk_put(trng->clk);
 
 	return 0;
 }
diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index b9495a8..334601c 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -40,6 +40,7 @@
 #include <linux/miscdevice.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
+#include <linux/random.h>
 #include <asm/uaccess.h>
 
 
@@ -301,9 +302,10 @@
 
 int hwrng_register(struct hwrng *rng)
 {
-	int must_register_misc;
 	int err = -EINVAL;
 	struct hwrng *old_rng, *tmp;
+	unsigned char bytes[16];
+	int bytes_read;
 
 	if (rng->name == NULL ||
 	    (rng->data_read == NULL && rng->read == NULL))
@@ -326,7 +328,6 @@
 			goto out_unlock;
 	}
 
-	must_register_misc = (current_rng == NULL);
 	old_rng = current_rng;
 	if (!old_rng) {
 		err = hwrng_init(rng);
@@ -335,18 +336,20 @@
 		current_rng = rng;
 	}
 	err = 0;
-	if (must_register_misc) {
+	if (!old_rng) {
 		err = register_miscdev();
 		if (err) {
-			if (!old_rng) {
-				hwrng_cleanup(rng);
-				current_rng = NULL;
-			}
+			hwrng_cleanup(rng);
+			current_rng = NULL;
 			goto out_unlock;
 		}
 	}
 	INIT_LIST_HEAD(&rng->list);
 	list_add_tail(&rng->list, &rng_list);
+
+	bytes_read = rng_get_data(rng, bytes, sizeof(bytes), 1);
+	if (bytes_read > 0)
+		add_device_randomness(bytes, bytes_read);
 out_unlock:
 	mutex_unlock(&rng_mutex);
 out:
diff --git a/drivers/char/hw_random/nomadik-rng.c b/drivers/char/hw_random/nomadik-rng.c
index 00e9d2d..9c85815 100644
--- a/drivers/char/hw_random/nomadik-rng.c
+++ b/drivers/char/hw_random/nomadik-rng.c
@@ -43,7 +43,7 @@
 	void __iomem *base;
 	int ret;
 
-	rng_clk = clk_get(&dev->dev, NULL);
+	rng_clk = devm_clk_get(&dev->dev, NULL);
 	if (IS_ERR(rng_clk)) {
 		dev_err(&dev->dev, "could not get rng clock\n");
 		ret = PTR_ERR(rng_clk);
@@ -56,33 +56,28 @@
 	if (ret)
 		goto out_clk;
 	ret = -ENOMEM;
-	base = ioremap(dev->res.start, resource_size(&dev->res));
+	base = devm_ioremap(&dev->dev, dev->res.start,
+			    resource_size(&dev->res));
 	if (!base)
 		goto out_release;
 	nmk_rng.priv = (unsigned long)base;
 	ret = hwrng_register(&nmk_rng);
 	if (ret)
-		goto out_unmap;
+		goto out_release;
 	return 0;
 
-out_unmap:
-	iounmap(base);
 out_release:
 	amba_release_regions(dev);
 out_clk:
 	clk_disable(rng_clk);
-	clk_put(rng_clk);
 	return ret;
 }
 
 static int nmk_rng_remove(struct amba_device *dev)
 {
-	void __iomem *base = (void __iomem *)nmk_rng.priv;
 	hwrng_unregister(&nmk_rng);
-	iounmap(base);
 	amba_release_regions(dev);
 	clk_disable(rng_clk);
-	clk_put(rng_clk);
 	return 0;
 }
 
diff --git a/drivers/char/hw_random/omap3-rom-rng.c b/drivers/char/hw_random/omap3-rom-rng.c
index c853e9e..6f2eaff 100644
--- a/drivers/char/hw_random/omap3-rom-rng.c
+++ b/drivers/char/hw_random/omap3-rom-rng.c
@@ -103,7 +103,7 @@
 	}
 
 	setup_timer(&idle_timer, omap3_rom_rng_idle, 0);
-	rng_clk = clk_get(&pdev->dev, "ick");
+	rng_clk = devm_clk_get(&pdev->dev, "ick");
 	if (IS_ERR(rng_clk)) {
 		pr_err("unable to get RNG clock\n");
 		return PTR_ERR(rng_clk);
@@ -120,7 +120,6 @@
 {
 	hwrng_unregister(&omap3_rom_rng_ops);
 	clk_disable_unprepare(rng_clk);
-	clk_put(rng_clk);
 	return 0;
 }
 
diff --git a/drivers/char/hw_random/picoxcell-rng.c b/drivers/char/hw_random/picoxcell-rng.c
index 3d4c229..eab5448 100644
--- a/drivers/char/hw_random/picoxcell-rng.c
+++ b/drivers/char/hw_random/picoxcell-rng.c
@@ -104,24 +104,11 @@
 	int ret;
 	struct resource *mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 
-	if (!mem) {
-		dev_warn(&pdev->dev, "no memory resource\n");
-		return -ENOMEM;
-	}
+	rng_base = devm_ioremap_resource(&pdev->dev, mem);
+	if (IS_ERR(rng_base))
+		return PTR_ERR(rng_base);
 
-	if (!devm_request_mem_region(&pdev->dev, mem->start, resource_size(mem),
-				     "picoxcell_trng")) {
-		dev_warn(&pdev->dev, "unable to request io mem\n");
-		return -EBUSY;
-	}
-
-	rng_base = devm_ioremap(&pdev->dev, mem->start, resource_size(mem));
-	if (!rng_base) {
-		dev_warn(&pdev->dev, "unable to remap io mem\n");
-		return -ENOMEM;
-	}
-
-	rng_clk = clk_get(&pdev->dev, NULL);
+	rng_clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(rng_clk)) {
 		dev_warn(&pdev->dev, "no clk\n");
 		return PTR_ERR(rng_clk);
@@ -130,7 +117,7 @@
 	ret = clk_enable(rng_clk);
 	if (ret) {
 		dev_warn(&pdev->dev, "unable to enable clk\n");
-		goto err_enable;
+		return ret;
 	}
 
 	picoxcell_trng_start();
@@ -145,9 +132,6 @@
 
 err_register:
 	clk_disable(rng_clk);
-err_enable:
-	clk_put(rng_clk);
-
 	return ret;
 }
 
@@ -155,7 +139,6 @@
 {
 	hwrng_unregister(&picoxcell_trng);
 	clk_disable(rng_clk);
-	clk_put(rng_clk);
 
 	return 0;
 }
diff --git a/drivers/char/hw_random/timeriomem-rng.c b/drivers/char/hw_random/timeriomem-rng.c
index 73ce739..439ff8b 100644
--- a/drivers/char/hw_random/timeriomem-rng.c
+++ b/drivers/char/hw_random/timeriomem-rng.c
@@ -118,7 +118,8 @@
 	}
 
 	/* Allocate memory for the device structure (and zero it) */
-	priv = kzalloc(sizeof(struct timeriomem_rng_private_data), GFP_KERNEL);
+	priv = devm_kzalloc(&pdev->dev,
+			sizeof(struct timeriomem_rng_private_data), GFP_KERNEL);
 	if (!priv) {
 		dev_err(&pdev->dev, "failed to allocate device structure.\n");
 		return -ENOMEM;
@@ -134,17 +135,16 @@
 			period = i;
 		else {
 			dev_err(&pdev->dev, "missing period\n");
-			err = -EINVAL;
-			goto out_free;
+			return -EINVAL;
 		}
-	} else
+	} else {
 		period = pdata->period;
+	}
 
 	priv->period = usecs_to_jiffies(period);
 	if (priv->period < 1) {
 		dev_err(&pdev->dev, "period is less than one jiffy\n");
-		err = -EINVAL;
-		goto out_free;
+		return -EINVAL;
 	}
 
 	priv->expires	= jiffies;
@@ -160,24 +160,16 @@
 	priv->timeriomem_rng_ops.data_read	= timeriomem_rng_data_read;
 	priv->timeriomem_rng_ops.priv		= (unsigned long)priv;
 
-	if (!request_mem_region(res->start, resource_size(res),
-				dev_name(&pdev->dev))) {
-		dev_err(&pdev->dev, "request_mem_region failed\n");
-		err = -EBUSY;
+	priv->io_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(priv->io_base)) {
+		err = PTR_ERR(priv->io_base);
 		goto out_timer;
 	}
 
-	priv->io_base = ioremap(res->start, resource_size(res));
-	if (priv->io_base == NULL) {
-		dev_err(&pdev->dev, "ioremap failed\n");
-		err = -EIO;
-		goto out_release_io;
-	}
-
 	err = hwrng_register(&priv->timeriomem_rng_ops);
 	if (err) {
 		dev_err(&pdev->dev, "problem registering\n");
-		goto out;
+		goto out_timer;
 	}
 
 	dev_info(&pdev->dev, "32bits from 0x%p @ %dus\n",
@@ -185,30 +177,18 @@
 
 	return 0;
 
-out:
-	iounmap(priv->io_base);
-out_release_io:
-	release_mem_region(res->start, resource_size(res));
 out_timer:
 	del_timer_sync(&priv->timer);
-out_free:
-	kfree(priv);
 	return err;
 }
 
 static int timeriomem_rng_remove(struct platform_device *pdev)
 {
 	struct timeriomem_rng_private_data *priv = platform_get_drvdata(pdev);
-	struct resource *res;
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 
 	hwrng_unregister(&priv->timeriomem_rng_ops);
 
 	del_timer_sync(&priv->timer);
-	iounmap(priv->io_base);
-	release_mem_region(res->start, resource_size(res));
-	kfree(priv);
 
 	return 0;
 }
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 13857f5..03ccdb0 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -262,6 +262,17 @@
 	  OMAP processors have AES module accelerator. Select this if you
 	  want to use the OMAP module for AES algorithms.
 
+config CRYPTO_DEV_OMAP_DES
+	tristate "Support for OMAP DES3DES hw engine"
+	depends on ARCH_OMAP2PLUS
+	select CRYPTO_DES
+	select CRYPTO_BLKCIPHER2
+	help
+	  OMAP processors have DES/3DES module accelerator. Select this if you
+	  want to use the OMAP module for DES and 3DES algorithms. Currently
+	  the ECB and CBC modes of operation supported by the driver. Also
+	  accesses made on unaligned boundaries are also supported.
+
 config CRYPTO_DEV_PICOXCELL
 	tristate "Support for picoXcell IPSEC and Layer2 crypto engines"
 	depends on ARCH_PICOXCELL && HAVE_CLK
@@ -300,17 +311,6 @@
 	  Select this to offload Samsung S5PV210 or S5PC110 from AES
 	  algorithms execution.
 
-config CRYPTO_DEV_TEGRA_AES
-	tristate "Support for TEGRA AES hw engine"
-	depends on ARCH_TEGRA
-	select CRYPTO_AES
-	help
-	  TEGRA processors have AES module accelerator. Select this if you
-	  want to use the TEGRA module for AES algorithms.
-
-	  To compile this driver as a module, choose M here: the module
-	  will be called tegra-aes.
-
 config CRYPTO_DEV_NX
 	bool "Support for IBM Power7+ in-Nest cryptographic acceleration"
 	depends on PPC64 && IBMVIO
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index 0bc6aa0..482f090 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -13,6 +13,7 @@
 n2_crypto-y := n2_core.o n2_asm.o
 obj-$(CONFIG_CRYPTO_DEV_NX) += nx/
 obj-$(CONFIG_CRYPTO_DEV_OMAP_AES) += omap-aes.o
+obj-$(CONFIG_CRYPTO_DEV_OMAP_DES) += omap-des.o
 obj-$(CONFIG_CRYPTO_DEV_OMAP_SHAM) += omap-sham.o
 obj-$(CONFIG_CRYPTO_DEV_PADLOCK_AES) += padlock-aes.o
 obj-$(CONFIG_CRYPTO_DEV_PADLOCK_SHA) += padlock-sha.o
@@ -21,5 +22,4 @@
 obj-$(CONFIG_CRYPTO_DEV_S5P) += s5p-sss.o
 obj-$(CONFIG_CRYPTO_DEV_SAHARA) += sahara.o
 obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o
-obj-$(CONFIG_CRYPTO_DEV_TEGRA_AES) += tegra-aes.o
 obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/
diff --git a/drivers/crypto/bfin_crc.c b/drivers/crypto/bfin_crc.c
index d797f31..c9ff298 100644
--- a/drivers/crypto/bfin_crc.c
+++ b/drivers/crypto/bfin_crc.c
@@ -139,7 +139,6 @@
 	/* setup CRC interrupts */
 	crc->regs->status = CMPERRI | DCNTEXPI;
 	crc->regs->intrenset = CMPERRI | DCNTEXPI;
-	SSYNC();
 
 	return 0;
 }
@@ -285,17 +284,12 @@
 	if (i == 0)
 		return;
 
-	flush_dcache_range((unsigned int)crc->sg_cpu,
-			(unsigned int)crc->sg_cpu +
-			i * sizeof(struct dma_desc_array));
-
 	/* Set the last descriptor to stop mode */
 	crc->sg_cpu[i - 1].cfg &= ~(DMAFLOW | NDSIZE);
 	crc->sg_cpu[i - 1].cfg |= DI_EN;
 	set_dma_curr_desc_addr(crc->dma_ch, (unsigned long *)crc->sg_dma);
 	set_dma_x_count(crc->dma_ch, 0);
 	set_dma_x_modify(crc->dma_ch, 0);
-	SSYNC();
 	set_dma_config(crc->dma_ch, dma_config);
 }
 
@@ -415,7 +409,6 @@
 
 	/* finally kick off CRC operation */
 	crc->regs->control |= BLKEN;
-	SSYNC();
 
 	return -EINPROGRESS;
 }
@@ -539,7 +532,6 @@
 
 	if (crc->regs->status & DCNTEXP) {
 		crc->regs->status = DCNTEXP;
-		SSYNC();
 
 		/* prepare results */
 		put_unaligned_le32(crc->regs->result, crc->req->result);
@@ -594,7 +586,7 @@
 	unsigned int timeout = 100000;
 	int ret;
 
-	crc = kzalloc(sizeof(*crc), GFP_KERNEL);
+	crc = devm_kzalloc(dev, sizeof(*crc), GFP_KERNEL);
 	if (!crc) {
 		dev_err(&pdev->dev, "fail to malloc bfin_crypto_crc\n");
 		return -ENOMEM;
@@ -610,42 +602,39 @@
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (res == NULL) {
 		dev_err(&pdev->dev, "Cannot get IORESOURCE_MEM\n");
-		ret = -ENOENT;
-		goto out_error_free_mem;
+		return -ENOENT;
 	}
 
-	crc->regs = ioremap(res->start, resource_size(res));
-	if (!crc->regs) {
+	crc->regs = devm_ioremap_resource(dev, res);
+	if (IS_ERR((void *)crc->regs)) {
 		dev_err(&pdev->dev, "Cannot map CRC IO\n");
-		ret = -ENXIO;
-		goto out_error_free_mem;
+		return PTR_ERR((void *)crc->regs);
 	}
 
 	crc->irq = platform_get_irq(pdev, 0);
 	if (crc->irq < 0) {
 		dev_err(&pdev->dev, "No CRC DCNTEXP IRQ specified\n");
-		ret = -ENOENT;
-		goto out_error_unmap;
+		return -ENOENT;
 	}
 
-	ret = request_irq(crc->irq, bfin_crypto_crc_handler, IRQF_SHARED, dev_name(dev), crc);
+	ret = devm_request_irq(dev, crc->irq, bfin_crypto_crc_handler,
+			IRQF_SHARED, dev_name(dev), crc);
 	if (ret) {
 		dev_err(&pdev->dev, "Unable to request blackfin crc irq\n");
-		goto out_error_unmap;
+		return ret;
 	}
 
 	res = platform_get_resource(pdev, IORESOURCE_DMA, 0);
 	if (res == NULL) {
 		dev_err(&pdev->dev, "No CRC DMA channel specified\n");
-		ret = -ENOENT;
-		goto out_error_irq;
+		return -ENOENT;
 	}
 	crc->dma_ch = res->start;
 
 	ret = request_dma(crc->dma_ch, dev_name(dev));
 	if (ret) {
 		dev_err(&pdev->dev, "Unable to attach Blackfin CRC DMA channel\n");
-		goto out_error_irq;
+		return ret;
 	}
 
 	crc->sg_cpu = dma_alloc_coherent(&pdev->dev, PAGE_SIZE, &crc->sg_dma, GFP_KERNEL);
@@ -660,9 +649,7 @@
 	crc->sg_mid_buf = (u8 *)(crc->sg_cpu + ((CRC_MAX_DMA_DESC + 1) << 1));
 
 	crc->regs->control = 0;
-	SSYNC();
 	crc->regs->poly = crc->poly = (u32)pdev->dev.platform_data;
-	SSYNC();
 
 	while (!(crc->regs->status & LUTDONE) && (--timeout) > 0)
 		cpu_relax();
@@ -693,12 +680,6 @@
 	if (crc->sg_cpu)
 		dma_free_coherent(&pdev->dev, PAGE_SIZE, crc->sg_cpu, crc->sg_dma);
 	free_dma(crc->dma_ch);
-out_error_irq:
-	free_irq(crc->irq, crc);
-out_error_unmap:
-	iounmap((void *)crc->regs);
-out_error_free_mem:
-	kfree(crc);
 
 	return ret;
 }
@@ -721,10 +702,6 @@
 	crypto_unregister_ahash(&algs);
 	tasklet_kill(&crc->done_task);
 	free_dma(crc->dma_ch);
-	if (crc->irq > 0)
-		free_irq(crc->irq, crc);
-	iounmap((void *)crc->regs);
-	kfree(crc);
 
 	return 0;
 }
diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c
index b71f2fd..5f89125 100644
--- a/drivers/crypto/caam/caamalg.c
+++ b/drivers/crypto/caam/caamalg.c
@@ -66,10 +66,14 @@
 
 /* length of descriptors text */
 #define DESC_AEAD_BASE			(4 * CAAM_CMD_SZ)
-#define DESC_AEAD_ENC_LEN		(DESC_AEAD_BASE + 16 * CAAM_CMD_SZ)
-#define DESC_AEAD_DEC_LEN		(DESC_AEAD_BASE + 21 * CAAM_CMD_SZ)
+#define DESC_AEAD_ENC_LEN		(DESC_AEAD_BASE + 15 * CAAM_CMD_SZ)
+#define DESC_AEAD_DEC_LEN		(DESC_AEAD_BASE + 18 * CAAM_CMD_SZ)
 #define DESC_AEAD_GIVENC_LEN		(DESC_AEAD_ENC_LEN + 7 * CAAM_CMD_SZ)
 
+#define DESC_AEAD_NULL_BASE		(3 * CAAM_CMD_SZ)
+#define DESC_AEAD_NULL_ENC_LEN		(DESC_AEAD_NULL_BASE + 14 * CAAM_CMD_SZ)
+#define DESC_AEAD_NULL_DEC_LEN		(DESC_AEAD_NULL_BASE + 17 * CAAM_CMD_SZ)
+
 #define DESC_ABLKCIPHER_BASE		(3 * CAAM_CMD_SZ)
 #define DESC_ABLKCIPHER_ENC_LEN		(DESC_ABLKCIPHER_BASE + \
 					 20 * CAAM_CMD_SZ)
@@ -104,27 +108,14 @@
 }
 
 /*
- * Wait for completion of class 1 key loading before allowing
- * error propagation
- */
-static inline void append_dec_shr_done(u32 *desc)
-{
-	u32 *jump_cmd;
-
-	jump_cmd = append_jump(desc, JUMP_CLASS_CLASS1 | JUMP_TEST_ALL);
-	set_jump_tgt_here(desc, jump_cmd);
-	append_cmd(desc, SET_OK_NO_PROP_ERRORS | CMD_LOAD);
-}
-
-/*
  * For aead functions, read payload and write payload,
  * both of which are specified in req->src and req->dst
  */
 static inline void aead_append_src_dst(u32 *desc, u32 msg_type)
 {
+	append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | KEY_VLF);
 	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_BOTH |
 			     KEY_VLF | msg_type | FIFOLD_TYPE_LASTBOTH);
-	append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | KEY_VLF);
 }
 
 /*
@@ -211,9 +202,196 @@
 	append_key_aead(desc, ctx, keys_fit_inline);
 
 	set_jump_tgt_here(desc, key_jump_cmd);
+}
 
-	/* Propagate errors from shared to job descriptor */
-	append_cmd(desc, SET_OK_NO_PROP_ERRORS | CMD_LOAD);
+static int aead_null_set_sh_desc(struct crypto_aead *aead)
+{
+	struct aead_tfm *tfm = &aead->base.crt_aead;
+	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct device *jrdev = ctx->jrdev;
+	bool keys_fit_inline = false;
+	u32 *key_jump_cmd, *jump_cmd, *read_move_cmd, *write_move_cmd;
+	u32 *desc;
+
+	/*
+	 * Job Descriptor and Shared Descriptors
+	 * must all fit into the 64-word Descriptor h/w Buffer
+	 */
+	if (DESC_AEAD_NULL_ENC_LEN + DESC_JOB_IO_LEN +
+	    ctx->split_key_pad_len <= CAAM_DESC_BYTES_MAX)
+		keys_fit_inline = true;
+
+	/* aead_encrypt shared descriptor */
+	desc = ctx->sh_desc_enc;
+
+	init_sh_desc(desc, HDR_SHARE_SERIAL);
+
+	/* Skip if already shared */
+	key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+				   JUMP_COND_SHRD);
+	if (keys_fit_inline)
+		append_key_as_imm(desc, ctx->key, ctx->split_key_pad_len,
+				  ctx->split_key_len, CLASS_2 |
+				  KEY_DEST_MDHA_SPLIT | KEY_ENC);
+	else
+		append_key(desc, ctx->key_dma, ctx->split_key_len, CLASS_2 |
+			   KEY_DEST_MDHA_SPLIT | KEY_ENC);
+	set_jump_tgt_here(desc, key_jump_cmd);
+
+	/* cryptlen = seqoutlen - authsize */
+	append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize);
+
+	/*
+	 * NULL encryption; IV is zero
+	 * assoclen = (assoclen + cryptlen) - cryptlen
+	 */
+	append_math_sub(desc, VARSEQINLEN, SEQINLEN, REG3, CAAM_CMD_SZ);
+
+	/* read assoc before reading payload */
+	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_MSG |
+			     KEY_VLF);
+
+	/* Prepare to read and write cryptlen bytes */
+	append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
+	append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
+
+	/*
+	 * MOVE_LEN opcode is not available in all SEC HW revisions,
+	 * thus need to do some magic, i.e. self-patch the descriptor
+	 * buffer.
+	 */
+	read_move_cmd = append_move(desc, MOVE_SRC_DESCBUF |
+				    MOVE_DEST_MATH3 |
+				    (0x6 << MOVE_LEN_SHIFT));
+	write_move_cmd = append_move(desc, MOVE_SRC_MATH3 |
+				     MOVE_DEST_DESCBUF |
+				     MOVE_WAITCOMP |
+				     (0x8 << MOVE_LEN_SHIFT));
+
+	/* Class 2 operation */
+	append_operation(desc, ctx->class2_alg_type |
+			 OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT);
+
+	/* Read and write cryptlen bytes */
+	aead_append_src_dst(desc, FIFOLD_TYPE_MSG | FIFOLD_TYPE_FLUSH1);
+
+	set_move_tgt_here(desc, read_move_cmd);
+	set_move_tgt_here(desc, write_move_cmd);
+	append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO);
+	append_move(desc, MOVE_SRC_INFIFO_CL | MOVE_DEST_OUTFIFO |
+		    MOVE_AUX_LS);
+
+	/* Write ICV */
+	append_seq_store(desc, ctx->authsize, LDST_CLASS_2_CCB |
+			 LDST_SRCDST_BYTE_CONTEXT);
+
+	ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc,
+					      desc_bytes(desc),
+					      DMA_TO_DEVICE);
+	if (dma_mapping_error(jrdev, ctx->sh_desc_enc_dma)) {
+		dev_err(jrdev, "unable to map shared descriptor\n");
+		return -ENOMEM;
+	}
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR,
+		       "aead null enc shdesc@"__stringify(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, desc,
+		       desc_bytes(desc), 1);
+#endif
+
+	/*
+	 * Job Descriptor and Shared Descriptors
+	 * must all fit into the 64-word Descriptor h/w Buffer
+	 */
+	if (DESC_AEAD_NULL_DEC_LEN + DESC_JOB_IO_LEN +
+	    ctx->split_key_pad_len <= CAAM_DESC_BYTES_MAX)
+		keys_fit_inline = true;
+
+	desc = ctx->sh_desc_dec;
+
+	/* aead_decrypt shared descriptor */
+	init_sh_desc(desc, HDR_SHARE_SERIAL);
+
+	/* Skip if already shared */
+	key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+				   JUMP_COND_SHRD);
+	if (keys_fit_inline)
+		append_key_as_imm(desc, ctx->key, ctx->split_key_pad_len,
+				  ctx->split_key_len, CLASS_2 |
+				  KEY_DEST_MDHA_SPLIT | KEY_ENC);
+	else
+		append_key(desc, ctx->key_dma, ctx->split_key_len, CLASS_2 |
+			   KEY_DEST_MDHA_SPLIT | KEY_ENC);
+	set_jump_tgt_here(desc, key_jump_cmd);
+
+	/* Class 2 operation */
+	append_operation(desc, ctx->class2_alg_type |
+			 OP_ALG_AS_INITFINAL | OP_ALG_DECRYPT | OP_ALG_ICV_ON);
+
+	/* assoclen + cryptlen = seqinlen - ivsize - authsize */
+	append_math_sub_imm_u32(desc, REG3, SEQINLEN, IMM,
+				ctx->authsize + tfm->ivsize);
+	/* assoclen = (assoclen + cryptlen) - cryptlen */
+	append_math_sub(desc, REG2, SEQOUTLEN, REG0, CAAM_CMD_SZ);
+	append_math_sub(desc, VARSEQINLEN, REG3, REG2, CAAM_CMD_SZ);
+
+	/* read assoc before reading payload */
+	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_MSG |
+			     KEY_VLF);
+
+	/* Prepare to read and write cryptlen bytes */
+	append_math_add(desc, VARSEQINLEN, ZERO, REG2, CAAM_CMD_SZ);
+	append_math_add(desc, VARSEQOUTLEN, ZERO, REG2, CAAM_CMD_SZ);
+
+	/*
+	 * MOVE_LEN opcode is not available in all SEC HW revisions,
+	 * thus need to do some magic, i.e. self-patch the descriptor
+	 * buffer.
+	 */
+	read_move_cmd = append_move(desc, MOVE_SRC_DESCBUF |
+				    MOVE_DEST_MATH2 |
+				    (0x6 << MOVE_LEN_SHIFT));
+	write_move_cmd = append_move(desc, MOVE_SRC_MATH2 |
+				     MOVE_DEST_DESCBUF |
+				     MOVE_WAITCOMP |
+				     (0x8 << MOVE_LEN_SHIFT));
+
+	/* Read and write cryptlen bytes */
+	aead_append_src_dst(desc, FIFOLD_TYPE_MSG | FIFOLD_TYPE_FLUSH1);
+
+	/*
+	 * Insert a NOP here, since we need at least 4 instructions between
+	 * code patching the descriptor buffer and the location being patched.
+	 */
+	jump_cmd = append_jump(desc, JUMP_TEST_ALL);
+	set_jump_tgt_here(desc, jump_cmd);
+
+	set_move_tgt_here(desc, read_move_cmd);
+	set_move_tgt_here(desc, write_move_cmd);
+	append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO);
+	append_move(desc, MOVE_SRC_INFIFO_CL | MOVE_DEST_OUTFIFO |
+		    MOVE_AUX_LS);
+	append_cmd(desc, CMD_LOAD | ENABLE_AUTO_INFO_FIFO);
+
+	/* Load ICV */
+	append_seq_fifo_load(desc, ctx->authsize, FIFOLD_CLASS_CLASS2 |
+			     FIFOLD_TYPE_LAST2 | FIFOLD_TYPE_ICV);
+
+	ctx->sh_desc_dec_dma = dma_map_single(jrdev, desc,
+					      desc_bytes(desc),
+					      DMA_TO_DEVICE);
+	if (dma_mapping_error(jrdev, ctx->sh_desc_dec_dma)) {
+		dev_err(jrdev, "unable to map shared descriptor\n");
+		return -ENOMEM;
+	}
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR,
+		       "aead null dec shdesc@"__stringify(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, desc,
+		       desc_bytes(desc), 1);
+#endif
+
+	return 0;
 }
 
 static int aead_set_sh_desc(struct crypto_aead *aead)
@@ -222,13 +400,16 @@
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *jrdev = ctx->jrdev;
 	bool keys_fit_inline = false;
-	u32 *key_jump_cmd, *jump_cmd;
 	u32 geniv, moveiv;
 	u32 *desc;
 
-	if (!ctx->enckeylen || !ctx->authsize)
+	if (!ctx->authsize)
 		return 0;
 
+	/* NULL encryption / decryption */
+	if (!ctx->enckeylen)
+		return aead_null_set_sh_desc(aead);
+
 	/*
 	 * Job Descriptor and Shared Descriptors
 	 * must all fit into the 64-word Descriptor h/w Buffer
@@ -253,7 +434,7 @@
 	/* assoclen + cryptlen = seqinlen - ivsize */
 	append_math_sub_imm_u32(desc, REG2, SEQINLEN, IMM, tfm->ivsize);
 
-	/* assoclen + cryptlen = (assoclen + cryptlen) - cryptlen */
+	/* assoclen = (assoclen + cryptlen) - cryptlen */
 	append_math_sub(desc, VARSEQINLEN, REG2, REG3, CAAM_CMD_SZ);
 
 	/* read assoc before reading payload */
@@ -296,30 +477,18 @@
 	    CAAM_DESC_BYTES_MAX)
 		keys_fit_inline = true;
 
+	/* aead_decrypt shared descriptor */
 	desc = ctx->sh_desc_dec;
 
-	/* aead_decrypt shared descriptor */
-	init_sh_desc(desc, HDR_SHARE_SERIAL);
-
-	/* Skip if already shared */
-	key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
-				   JUMP_COND_SHRD);
-
-	append_key_aead(desc, ctx, keys_fit_inline);
-
-	/* Only propagate error immediately if shared */
-	jump_cmd = append_jump(desc, JUMP_TEST_ALL);
-	set_jump_tgt_here(desc, key_jump_cmd);
-	append_cmd(desc, SET_OK_NO_PROP_ERRORS | CMD_LOAD);
-	set_jump_tgt_here(desc, jump_cmd);
+	init_sh_desc_key_aead(desc, ctx, keys_fit_inline);
 
 	/* Class 2 operation */
 	append_operation(desc, ctx->class2_alg_type |
 			 OP_ALG_AS_INITFINAL | OP_ALG_DECRYPT | OP_ALG_ICV_ON);
 
-	/* assoclen + cryptlen = seqinlen - ivsize */
+	/* assoclen + cryptlen = seqinlen - ivsize - authsize */
 	append_math_sub_imm_u32(desc, REG3, SEQINLEN, IMM,
-				ctx->authsize + tfm->ivsize)
+				ctx->authsize + tfm->ivsize);
 	/* assoclen = (assoclen + cryptlen) - cryptlen */
 	append_math_sub(desc, REG2, SEQOUTLEN, REG0, CAAM_CMD_SZ);
 	append_math_sub(desc, VARSEQINLEN, REG3, REG2, CAAM_CMD_SZ);
@@ -340,7 +509,6 @@
 	/* Load ICV */
 	append_seq_fifo_load(desc, ctx->authsize, FIFOLD_CLASS_CLASS2 |
 			     FIFOLD_TYPE_LAST2 | FIFOLD_TYPE_ICV);
-	append_dec_shr_done(desc);
 
 	ctx->sh_desc_dec_dma = dma_map_single(jrdev, desc,
 					      desc_bytes(desc),
@@ -532,7 +700,7 @@
 	struct ablkcipher_tfm *tfm = &ablkcipher->base.crt_ablkcipher;
 	struct device *jrdev = ctx->jrdev;
 	int ret = 0;
-	u32 *key_jump_cmd, *jump_cmd;
+	u32 *key_jump_cmd;
 	u32 *desc;
 
 #ifdef DEBUG
@@ -563,9 +731,6 @@
 
 	set_jump_tgt_here(desc, key_jump_cmd);
 
-	/* Propagate errors from shared to job descriptor */
-	append_cmd(desc, SET_OK_NO_PROP_ERRORS | CMD_LOAD);
-
 	/* Load iv */
 	append_cmd(desc, CMD_SEQ_LOAD | LDST_SRCDST_BYTE_CONTEXT |
 		   LDST_CLASS_1_CCB | tfm->ivsize);
@@ -603,11 +768,7 @@
 			  ctx->enckeylen, CLASS_1 |
 			  KEY_DEST_CLASS_REG);
 
-	/* For aead, only propagate error immediately if shared */
-	jump_cmd = append_jump(desc, JUMP_TEST_ALL);
 	set_jump_tgt_here(desc, key_jump_cmd);
-	append_cmd(desc, SET_OK_NO_PROP_ERRORS | CMD_LOAD);
-	set_jump_tgt_here(desc, jump_cmd);
 
 	/* load IV */
 	append_cmd(desc, CMD_SEQ_LOAD | LDST_SRCDST_BYTE_CONTEXT |
@@ -619,9 +780,6 @@
 	/* Perform operation */
 	ablkcipher_append_src_dst(desc);
 
-	/* Wait for key to load before allowing propagating error */
-	append_dec_shr_done(desc);
-
 	ctx->sh_desc_dec_dma = dma_map_single(jrdev, desc,
 					      desc_bytes(desc),
 					      DMA_TO_DEVICE);
@@ -1459,6 +1617,11 @@
 	return ret;
 }
 
+static int aead_null_givencrypt(struct aead_givcrypt_request *areq)
+{
+	return aead_encrypt(&areq->areq);
+}
+
 /*
  * allocate and map the ablkcipher extended descriptor for ablkcipher
  */
@@ -1648,6 +1811,124 @@
 static struct caam_alg_template driver_algs[] = {
 	/* single-pass ipsec_esp descriptor */
 	{
+		.name = "authenc(hmac(md5),ecb(cipher_null))",
+		.driver_name = "authenc-hmac-md5-ecb-cipher_null-caam",
+		.blocksize = NULL_BLOCK_SIZE,
+		.type = CRYPTO_ALG_TYPE_AEAD,
+		.template_aead = {
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.givencrypt = aead_null_givencrypt,
+			.geniv = "<built-in>",
+			.ivsize = NULL_IV_SIZE,
+			.maxauthsize = MD5_DIGEST_SIZE,
+			},
+		.class1_alg_type = 0,
+		.class2_alg_type = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC_PRECOMP,
+		.alg_op = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC,
+	},
+	{
+		.name = "authenc(hmac(sha1),ecb(cipher_null))",
+		.driver_name = "authenc-hmac-sha1-ecb-cipher_null-caam",
+		.blocksize = NULL_BLOCK_SIZE,
+		.type = CRYPTO_ALG_TYPE_AEAD,
+		.template_aead = {
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.givencrypt = aead_null_givencrypt,
+			.geniv = "<built-in>",
+			.ivsize = NULL_IV_SIZE,
+			.maxauthsize = SHA1_DIGEST_SIZE,
+			},
+		.class1_alg_type = 0,
+		.class2_alg_type = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC_PRECOMP,
+		.alg_op = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC,
+	},
+	{
+		.name = "authenc(hmac(sha224),ecb(cipher_null))",
+		.driver_name = "authenc-hmac-sha224-ecb-cipher_null-caam",
+		.blocksize = NULL_BLOCK_SIZE,
+		.type = CRYPTO_ALG_TYPE_AEAD,
+		.template_aead = {
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.givencrypt = aead_null_givencrypt,
+			.geniv = "<built-in>",
+			.ivsize = NULL_IV_SIZE,
+			.maxauthsize = SHA224_DIGEST_SIZE,
+			},
+		.class1_alg_type = 0,
+		.class2_alg_type = OP_ALG_ALGSEL_SHA224 |
+				   OP_ALG_AAI_HMAC_PRECOMP,
+		.alg_op = OP_ALG_ALGSEL_SHA224 | OP_ALG_AAI_HMAC,
+	},
+	{
+		.name = "authenc(hmac(sha256),ecb(cipher_null))",
+		.driver_name = "authenc-hmac-sha256-ecb-cipher_null-caam",
+		.blocksize = NULL_BLOCK_SIZE,
+		.type = CRYPTO_ALG_TYPE_AEAD,
+		.template_aead = {
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.givencrypt = aead_null_givencrypt,
+			.geniv = "<built-in>",
+			.ivsize = NULL_IV_SIZE,
+			.maxauthsize = SHA256_DIGEST_SIZE,
+			},
+		.class1_alg_type = 0,
+		.class2_alg_type = OP_ALG_ALGSEL_SHA256 |
+				   OP_ALG_AAI_HMAC_PRECOMP,
+		.alg_op = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC,
+	},
+	{
+		.name = "authenc(hmac(sha384),ecb(cipher_null))",
+		.driver_name = "authenc-hmac-sha384-ecb-cipher_null-caam",
+		.blocksize = NULL_BLOCK_SIZE,
+		.type = CRYPTO_ALG_TYPE_AEAD,
+		.template_aead = {
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.givencrypt = aead_null_givencrypt,
+			.geniv = "<built-in>",
+			.ivsize = NULL_IV_SIZE,
+			.maxauthsize = SHA384_DIGEST_SIZE,
+			},
+		.class1_alg_type = 0,
+		.class2_alg_type = OP_ALG_ALGSEL_SHA384 |
+				   OP_ALG_AAI_HMAC_PRECOMP,
+		.alg_op = OP_ALG_ALGSEL_SHA384 | OP_ALG_AAI_HMAC,
+	},
+	{
+		.name = "authenc(hmac(sha512),ecb(cipher_null))",
+		.driver_name = "authenc-hmac-sha512-ecb-cipher_null-caam",
+		.blocksize = NULL_BLOCK_SIZE,
+		.type = CRYPTO_ALG_TYPE_AEAD,
+		.template_aead = {
+			.setkey = aead_setkey,
+			.setauthsize = aead_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.givencrypt = aead_null_givencrypt,
+			.geniv = "<built-in>",
+			.ivsize = NULL_IV_SIZE,
+			.maxauthsize = SHA512_DIGEST_SIZE,
+			},
+		.class1_alg_type = 0,
+		.class2_alg_type = OP_ALG_ALGSEL_SHA512 |
+				   OP_ALG_AAI_HMAC_PRECOMP,
+		.alg_op = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC,
+	},
+	{
 		.name = "authenc(hmac(md5),cbc(aes))",
 		.driver_name = "authenc-hmac-md5-cbc-aes-caam",
 		.blocksize = AES_BLOCK_SIZE,
@@ -2099,6 +2380,11 @@
 		dma_unmap_single(ctx->jrdev, ctx->sh_desc_givenc_dma,
 				 desc_bytes(ctx->sh_desc_givenc),
 				 DMA_TO_DEVICE);
+	if (ctx->key_dma &&
+	    !dma_mapping_error(ctx->jrdev, ctx->key_dma))
+		dma_unmap_single(ctx->jrdev, ctx->key_dma,
+				 ctx->enckeylen + ctx->split_key_pad_len,
+				 DMA_TO_DEVICE);
 
 	caam_jr_free(ctx->jrdev);
 }
diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c
index 28486b1..3529b54 100644
--- a/drivers/crypto/caam/caamrng.c
+++ b/drivers/crypto/caam/caamrng.c
@@ -76,7 +76,7 @@
 	struct buf_data bufs[2];
 };
 
-static struct caam_rng_ctx rng_ctx;
+static struct caam_rng_ctx *rng_ctx;
 
 static inline void rng_unmap_buf(struct device *jrdev, struct buf_data *bd)
 {
@@ -137,7 +137,7 @@
 
 static int caam_read(struct hwrng *rng, void *data, size_t max, bool wait)
 {
-	struct caam_rng_ctx *ctx = &rng_ctx;
+	struct caam_rng_ctx *ctx = rng_ctx;
 	struct buf_data *bd = &ctx->bufs[ctx->current_buf];
 	int next_buf_idx, copied_idx;
 	int err;
@@ -237,12 +237,12 @@
 	struct buf_data *bd;
 
 	for (i = 0; i < 2; i++) {
-		bd = &rng_ctx.bufs[i];
+		bd = &rng_ctx->bufs[i];
 		if (atomic_read(&bd->empty) == BUF_PENDING)
 			wait_for_completion(&bd->filled);
 	}
 
-	rng_unmap_ctx(&rng_ctx);
+	rng_unmap_ctx(rng_ctx);
 }
 
 static void caam_init_buf(struct caam_rng_ctx *ctx, int buf_id)
@@ -273,8 +273,9 @@
 
 static void __exit caam_rng_exit(void)
 {
-	caam_jr_free(rng_ctx.jrdev);
+	caam_jr_free(rng_ctx->jrdev);
 	hwrng_unregister(&caam_rng);
+	kfree(rng_ctx);
 }
 
 static int __init caam_rng_init(void)
@@ -286,8 +287,10 @@
 		pr_err("Job Ring Device allocation for transform failed\n");
 		return PTR_ERR(dev);
 	}
-
-	caam_init_rng(&rng_ctx, dev);
+	rng_ctx = kmalloc(sizeof(struct caam_rng_ctx), GFP_DMA);
+	if (!rng_ctx)
+		return -ENOMEM;
+	caam_init_rng(rng_ctx, dev);
 
 	dev_info(dev, "registering rng-caam\n");
 	return hwrng_register(&caam_rng);
diff --git a/drivers/crypto/caam/compat.h b/drivers/crypto/caam/compat.h
index 762aeff..f227922 100644
--- a/drivers/crypto/caam/compat.h
+++ b/drivers/crypto/caam/compat.h
@@ -26,6 +26,7 @@
 #include <net/xfrm.h>
 
 #include <crypto/algapi.h>
+#include <crypto/null.h>
 #include <crypto/aes.h>
 #include <crypto/des.h>
 #include <crypto/sha.h>
diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c
index 63fb1af..1c38f86 100644
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -14,7 +14,6 @@
 #include "jr.h"
 #include "desc_constr.h"
 #include "error.h"
-#include "ctrl.h"
 
 /*
  * Descriptor to instantiate RNG State Handle 0 in normal mode and
@@ -352,32 +351,17 @@
 
 /**
  * caam_get_era() - Return the ERA of the SEC on SoC, based
- * on the SEC_VID register.
- * Returns the ERA number (1..4) or -ENOTSUPP if the ERA is unknown.
- * @caam_id - the value of the SEC_VID register
+ * on "sec-era" propery in the DTS. This property is updated by u-boot.
  **/
-int caam_get_era(u64 caam_id)
+int caam_get_era(void)
 {
-	struct sec_vid *sec_vid = (struct sec_vid *)&caam_id;
-	static const struct {
-		u16 ip_id;
-		u8 maj_rev;
-		u8 era;
-	} caam_eras[] = {
-		{0x0A10, 1, 1},
-		{0x0A10, 2, 2},
-		{0x0A12, 1, 3},
-		{0x0A14, 1, 3},
-		{0x0A14, 2, 4},
-		{0x0A16, 1, 4},
-		{0x0A11, 1, 4}
-	};
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(caam_eras); i++)
-		if (caam_eras[i].ip_id == sec_vid->ip_id &&
-			caam_eras[i].maj_rev == sec_vid->maj_rev)
-				return caam_eras[i].era;
+	struct device_node *caam_node;
+	for_each_compatible_node(caam_node, NULL, "fsl,sec-v4.0") {
+		const uint32_t *prop = (uint32_t *)of_get_property(caam_node,
+				"fsl,sec-era",
+				NULL);
+		return prop ? *prop : -ENOTSUPP;
+	}
 
 	return -ENOTSUPP;
 }
@@ -443,13 +427,10 @@
 	 * for all, then go probe each one.
 	 */
 	rspec = 0;
-	for_each_compatible_node(np, NULL, "fsl,sec-v4.0-job-ring")
-		rspec++;
-	if (!rspec) {
-		/* for backward compatible with device trees */
-		for_each_compatible_node(np, NULL, "fsl,sec4.0-job-ring")
+	for_each_available_child_of_node(nprop, np)
+		if (of_device_is_compatible(np, "fsl,sec-v4.0-job-ring") ||
+		    of_device_is_compatible(np, "fsl,sec4.0-job-ring"))
 			rspec++;
-	}
 
 	ctrlpriv->jrpdev = kzalloc(sizeof(struct platform_device *) * rspec,
 								GFP_KERNEL);
@@ -460,18 +441,9 @@
 
 	ring = 0;
 	ctrlpriv->total_jobrs = 0;
-	for_each_compatible_node(np, NULL, "fsl,sec-v4.0-job-ring") {
-		ctrlpriv->jrpdev[ring] =
-				of_platform_device_create(np, NULL, dev);
-		if (!ctrlpriv->jrpdev[ring]) {
-			pr_warn("JR%d Platform device creation error\n", ring);
-			continue;
-		}
-		ctrlpriv->total_jobrs++;
-		ring++;
-	}
-	if (!ring) {
-		for_each_compatible_node(np, NULL, "fsl,sec4.0-job-ring") {
+	for_each_available_child_of_node(nprop, np)
+		if (of_device_is_compatible(np, "fsl,sec-v4.0-job-ring") ||
+		    of_device_is_compatible(np, "fsl,sec4.0-job-ring")) {
 			ctrlpriv->jrpdev[ring] =
 				of_platform_device_create(np, NULL, dev);
 			if (!ctrlpriv->jrpdev[ring]) {
@@ -482,7 +454,6 @@
 			ctrlpriv->total_jobrs++;
 			ring++;
 		}
-	}
 
 	/* Check to see if QI present. If so, enable */
 	ctrlpriv->qi_present = !!(rd_reg64(&topregs->ctrl.perfmon.comp_parms) &
@@ -564,7 +535,7 @@
 
 	/* Report "alive" for developer to see */
 	dev_info(dev, "device ID = 0x%016llx (Era %d)\n", caam_id,
-		 caam_get_era(caam_id));
+		 caam_get_era());
 	dev_info(dev, "job rings = %d, qi = %d\n",
 		 ctrlpriv->total_jobrs, ctrlpriv->qi_present);
 
diff --git a/drivers/crypto/caam/ctrl.h b/drivers/crypto/caam/ctrl.h
index 980d44e..cac5402 100644
--- a/drivers/crypto/caam/ctrl.h
+++ b/drivers/crypto/caam/ctrl.h
@@ -8,6 +8,6 @@
 #define CTRL_H
 
 /* Prototypes for backend-level services exposed to APIs */
-int caam_get_era(u64 caam_id);
+int caam_get_era(void);
 
 #endif /* CTRL_H */
diff --git a/drivers/crypto/caam/desc_constr.h b/drivers/crypto/caam/desc_constr.h
index cd5f678..7eec20b 100644
--- a/drivers/crypto/caam/desc_constr.h
+++ b/drivers/crypto/caam/desc_constr.h
@@ -155,21 +155,29 @@
 	append_data(desc, data, len);
 }
 
-static inline u32 *append_jump(u32 *desc, u32 options)
-{
-	u32 *cmd = desc_end(desc);
-
-	PRINT_POS;
-	append_cmd(desc, CMD_JUMP | options);
-
-	return cmd;
+#define APPEND_CMD_RET(cmd, op) \
+static inline u32 *append_##cmd(u32 *desc, u32 options) \
+{ \
+	u32 *cmd = desc_end(desc); \
+	PRINT_POS; \
+	append_cmd(desc, CMD_##op | options); \
+	return cmd; \
 }
+APPEND_CMD_RET(jump, JUMP)
+APPEND_CMD_RET(move, MOVE)
 
 static inline void set_jump_tgt_here(u32 *desc, u32 *jump_cmd)
 {
 	*jump_cmd = *jump_cmd | (desc_len(desc) - (jump_cmd - desc));
 }
 
+static inline void set_move_tgt_here(u32 *desc, u32 *move_cmd)
+{
+	*move_cmd &= ~MOVE_OFFSET_MASK;
+	*move_cmd = *move_cmd | ((desc_len(desc) << (MOVE_OFFSET_SHIFT + 2)) &
+				 MOVE_OFFSET_MASK);
+}
+
 #define APPEND_CMD(cmd, op) \
 static inline void append_##cmd(u32 *desc, u32 options) \
 { \
@@ -177,7 +185,6 @@
 	append_cmd(desc, CMD_##op | options); \
 }
 APPEND_CMD(operation, OPERATION)
-APPEND_CMD(move, MOVE)
 
 #define APPEND_CMD_LEN(cmd, op) \
 static inline void append_##cmd(u32 *desc, unsigned int len, u32 options) \
@@ -328,7 +335,7 @@
 do { \
 	APPEND_MATH(op, desc, dest, src_0, src_1, CAAM_CMD_SZ); \
 	append_cmd(desc, data); \
-} while (0);
+} while (0)
 
 #define append_math_add_imm_u32(desc, dest, src0, src1, data) \
 	APPEND_MATH_IMM_u32(ADD, desc, dest, src0, src1, data)
diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h
index d50174f..cbde8b9 100644
--- a/drivers/crypto/caam/regs.h
+++ b/drivers/crypto/caam/regs.h
@@ -74,10 +74,10 @@
 #endif
 #else
 #ifdef __LITTLE_ENDIAN
-#define wr_reg32(reg, data) __raw_writel(reg, data)
+#define wr_reg32(reg, data) __raw_writel(data, reg)
 #define rd_reg32(reg) __raw_readl(reg)
 #ifdef CONFIG_64BIT
-#define wr_reg64(reg, data) __raw_writeq(reg, data)
+#define wr_reg64(reg, data) __raw_writeq(data, reg)
 #define rd_reg64(reg) __raw_readq(reg)
 #endif
 #endif
diff --git a/drivers/crypto/ccp/ccp-crypto-main.c b/drivers/crypto/ccp/ccp-crypto-main.c
index 2636f04..20dc848 100644
--- a/drivers/crypto/ccp/ccp-crypto-main.c
+++ b/drivers/crypto/ccp/ccp-crypto-main.c
@@ -11,6 +11,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/moduleparam.h>
 #include <linux/kernel.h>
 #include <linux/list.h>
 #include <linux/ccp.h>
@@ -24,28 +25,33 @@
 MODULE_VERSION("1.0.0");
 MODULE_DESCRIPTION("AMD Cryptographic Coprocessor crypto API support");
 
+static unsigned int aes_disable;
+module_param(aes_disable, uint, 0444);
+MODULE_PARM_DESC(aes_disable, "Disable use of AES - any non-zero value");
+
+static unsigned int sha_disable;
+module_param(sha_disable, uint, 0444);
+MODULE_PARM_DESC(sha_disable, "Disable use of SHA - any non-zero value");
+
 
 /* List heads for the supported algorithms */
 static LIST_HEAD(hash_algs);
 static LIST_HEAD(cipher_algs);
 
-/* For any tfm, requests for that tfm on the same CPU must be returned
- * in the order received.  With multiple queues available, the CCP can
- * process more than one cmd at a time.  Therefore we must maintain
- * a cmd list to insure the proper ordering of requests on a given tfm/cpu
- * combination.
+/* For any tfm, requests for that tfm must be returned on the order
+ * received.  With multiple queues available, the CCP can process more
+ * than one cmd at a time.  Therefore we must maintain a cmd list to insure
+ * the proper ordering of requests on a given tfm.
  */
-struct ccp_crypto_cpu_queue {
+struct ccp_crypto_queue {
 	struct list_head cmds;
 	struct list_head *backlog;
 	unsigned int cmd_count;
 };
-#define CCP_CRYPTO_MAX_QLEN	50
+#define CCP_CRYPTO_MAX_QLEN	100
 
-struct ccp_crypto_percpu_queue {
-	struct ccp_crypto_cpu_queue __percpu *cpu_queue;
-};
-static struct ccp_crypto_percpu_queue req_queue;
+static struct ccp_crypto_queue req_queue;
+static spinlock_t req_queue_lock;
 
 struct ccp_crypto_cmd {
 	struct list_head entry;
@@ -62,8 +68,6 @@
 
 	/* Used for held command processing to determine state */
 	int ret;
-
-	int cpu;
 };
 
 struct ccp_crypto_cpu {
@@ -82,25 +86,21 @@
 	return true;
 }
 
-/*
- * ccp_crypto_cmd_complete must be called while running on the appropriate
- * cpu and the caller must have done a get_cpu to disable preemption
- */
 static struct ccp_crypto_cmd *ccp_crypto_cmd_complete(
 	struct ccp_crypto_cmd *crypto_cmd, struct ccp_crypto_cmd **backlog)
 {
-	struct ccp_crypto_cpu_queue *cpu_queue;
 	struct ccp_crypto_cmd *held = NULL, *tmp;
+	unsigned long flags;
 
 	*backlog = NULL;
 
-	cpu_queue = this_cpu_ptr(req_queue.cpu_queue);
+	spin_lock_irqsave(&req_queue_lock, flags);
 
 	/* Held cmds will be after the current cmd in the queue so start
 	 * searching for a cmd with a matching tfm for submission.
 	 */
 	tmp = crypto_cmd;
-	list_for_each_entry_continue(tmp, &cpu_queue->cmds, entry) {
+	list_for_each_entry_continue(tmp, &req_queue.cmds, entry) {
 		if (crypto_cmd->tfm != tmp->tfm)
 			continue;
 		held = tmp;
@@ -111,47 +111,45 @@
 	 *   Because cmds can be executed from any point in the cmd list
 	 *   special precautions have to be taken when handling the backlog.
 	 */
-	if (cpu_queue->backlog != &cpu_queue->cmds) {
+	if (req_queue.backlog != &req_queue.cmds) {
 		/* Skip over this cmd if it is the next backlog cmd */
-		if (cpu_queue->backlog == &crypto_cmd->entry)
-			cpu_queue->backlog = crypto_cmd->entry.next;
+		if (req_queue.backlog == &crypto_cmd->entry)
+			req_queue.backlog = crypto_cmd->entry.next;
 
-		*backlog = container_of(cpu_queue->backlog,
+		*backlog = container_of(req_queue.backlog,
 					struct ccp_crypto_cmd, entry);
-		cpu_queue->backlog = cpu_queue->backlog->next;
+		req_queue.backlog = req_queue.backlog->next;
 
 		/* Skip over this cmd if it is now the next backlog cmd */
-		if (cpu_queue->backlog == &crypto_cmd->entry)
-			cpu_queue->backlog = crypto_cmd->entry.next;
+		if (req_queue.backlog == &crypto_cmd->entry)
+			req_queue.backlog = crypto_cmd->entry.next;
 	}
 
 	/* Remove the cmd entry from the list of cmds */
-	cpu_queue->cmd_count--;
+	req_queue.cmd_count--;
 	list_del(&crypto_cmd->entry);
 
+	spin_unlock_irqrestore(&req_queue_lock, flags);
+
 	return held;
 }
 
-static void ccp_crypto_complete_on_cpu(struct work_struct *work)
+static void ccp_crypto_complete(void *data, int err)
 {
-	struct ccp_crypto_cpu *cpu_work =
-		container_of(work, struct ccp_crypto_cpu, work);
-	struct ccp_crypto_cmd *crypto_cmd = cpu_work->crypto_cmd;
+	struct ccp_crypto_cmd *crypto_cmd = data;
 	struct ccp_crypto_cmd *held, *next, *backlog;
 	struct crypto_async_request *req = crypto_cmd->req;
 	struct ccp_ctx *ctx = crypto_tfm_ctx(req->tfm);
-	int cpu, ret;
+	int ret;
 
-	cpu = get_cpu();
-
-	if (cpu_work->err == -EINPROGRESS) {
+	if (err == -EINPROGRESS) {
 		/* Only propogate the -EINPROGRESS if necessary */
 		if (crypto_cmd->ret == -EBUSY) {
 			crypto_cmd->ret = -EINPROGRESS;
 			req->complete(req, -EINPROGRESS);
 		}
 
-		goto e_cpu;
+		return;
 	}
 
 	/* Operation has completed - update the queue before invoking
@@ -169,18 +167,25 @@
 		req->complete(req, -EINPROGRESS);
 
 	/* Completion callbacks */
-	ret = cpu_work->err;
+	ret = err;
 	if (ctx->complete)
 		ret = ctx->complete(req, ret);
 	req->complete(req, ret);
 
 	/* Submit the next cmd */
 	while (held) {
+		/* Since we have already queued the cmd, we must indicate that
+		 * we can backlog so as not to "lose" this request.
+		 */
+		held->cmd->flags |= CCP_CMD_MAY_BACKLOG;
 		ret = ccp_enqueue_cmd(held->cmd);
 		if (ccp_crypto_success(ret))
 			break;
 
 		/* Error occurred, report it and get the next entry */
+		ctx = crypto_tfm_ctx(held->req->tfm);
+		if (ctx->complete)
+			ret = ctx->complete(held->req, ret);
 		held->req->complete(held->req, ret);
 
 		next = ccp_crypto_cmd_complete(held, &backlog);
@@ -194,52 +199,29 @@
 	}
 
 	kfree(crypto_cmd);
-
-e_cpu:
-	put_cpu();
-
-	complete(&cpu_work->completion);
-}
-
-static void ccp_crypto_complete(void *data, int err)
-{
-	struct ccp_crypto_cmd *crypto_cmd = data;
-	struct ccp_crypto_cpu cpu_work;
-
-	INIT_WORK(&cpu_work.work, ccp_crypto_complete_on_cpu);
-	init_completion(&cpu_work.completion);
-	cpu_work.crypto_cmd = crypto_cmd;
-	cpu_work.err = err;
-
-	schedule_work_on(crypto_cmd->cpu, &cpu_work.work);
-
-	/* Keep the completion call synchronous */
-	wait_for_completion(&cpu_work.completion);
 }
 
 static int ccp_crypto_enqueue_cmd(struct ccp_crypto_cmd *crypto_cmd)
 {
-	struct ccp_crypto_cpu_queue *cpu_queue;
 	struct ccp_crypto_cmd *active = NULL, *tmp;
-	int cpu, ret;
+	unsigned long flags;
+	bool free_cmd = true;
+	int ret;
 
-	cpu = get_cpu();
-	crypto_cmd->cpu = cpu;
-
-	cpu_queue = this_cpu_ptr(req_queue.cpu_queue);
+	spin_lock_irqsave(&req_queue_lock, flags);
 
 	/* Check if the cmd can/should be queued */
-	if (cpu_queue->cmd_count >= CCP_CRYPTO_MAX_QLEN) {
+	if (req_queue.cmd_count >= CCP_CRYPTO_MAX_QLEN) {
 		ret = -EBUSY;
 		if (!(crypto_cmd->cmd->flags & CCP_CMD_MAY_BACKLOG))
-			goto e_cpu;
+			goto e_lock;
 	}
 
 	/* Look for an entry with the same tfm.  If there is a cmd
-	 * with the same tfm in the list for this cpu then the current
-	 * cmd cannot be submitted to the CCP yet.
+	 * with the same tfm in the list then the current cmd cannot
+	 * be submitted to the CCP yet.
 	 */
-	list_for_each_entry(tmp, &cpu_queue->cmds, entry) {
+	list_for_each_entry(tmp, &req_queue.cmds, entry) {
 		if (crypto_cmd->tfm != tmp->tfm)
 			continue;
 		active = tmp;
@@ -250,21 +232,29 @@
 	if (!active) {
 		ret = ccp_enqueue_cmd(crypto_cmd->cmd);
 		if (!ccp_crypto_success(ret))
-			goto e_cpu;
+			goto e_lock;	/* Error, don't queue it */
+		if ((ret == -EBUSY) &&
+		    !(crypto_cmd->cmd->flags & CCP_CMD_MAY_BACKLOG))
+			goto e_lock;	/* Not backlogging, don't queue it */
 	}
 
-	if (cpu_queue->cmd_count >= CCP_CRYPTO_MAX_QLEN) {
+	if (req_queue.cmd_count >= CCP_CRYPTO_MAX_QLEN) {
 		ret = -EBUSY;
-		if (cpu_queue->backlog == &cpu_queue->cmds)
-			cpu_queue->backlog = &crypto_cmd->entry;
+		if (req_queue.backlog == &req_queue.cmds)
+			req_queue.backlog = &crypto_cmd->entry;
 	}
 	crypto_cmd->ret = ret;
 
-	cpu_queue->cmd_count++;
-	list_add_tail(&crypto_cmd->entry, &cpu_queue->cmds);
+	req_queue.cmd_count++;
+	list_add_tail(&crypto_cmd->entry, &req_queue.cmds);
 
-e_cpu:
-	put_cpu();
+	free_cmd = false;
+
+e_lock:
+	spin_unlock_irqrestore(&req_queue_lock, flags);
+
+	if (free_cmd)
+		kfree(crypto_cmd);
 
 	return ret;
 }
@@ -281,7 +271,6 @@
 {
 	struct ccp_crypto_cmd *crypto_cmd;
 	gfp_t gfp;
-	int ret;
 
 	gfp = req->flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC;
 
@@ -306,11 +295,7 @@
 	else
 		cmd->flags &= ~CCP_CMD_MAY_BACKLOG;
 
-	ret = ccp_crypto_enqueue_cmd(crypto_cmd);
-	if (!ccp_crypto_success(ret))
-		kfree(crypto_cmd);
-
-	return ret;
+	return ccp_crypto_enqueue_cmd(crypto_cmd);
 }
 
 struct scatterlist *ccp_crypto_sg_table_add(struct sg_table *table,
@@ -337,21 +322,25 @@
 {
 	int ret;
 
-	ret = ccp_register_aes_algs(&cipher_algs);
-	if (ret)
-		return ret;
+	if (!aes_disable) {
+		ret = ccp_register_aes_algs(&cipher_algs);
+		if (ret)
+			return ret;
 
-	ret = ccp_register_aes_cmac_algs(&hash_algs);
-	if (ret)
-		return ret;
+		ret = ccp_register_aes_cmac_algs(&hash_algs);
+		if (ret)
+			return ret;
 
-	ret = ccp_register_aes_xts_algs(&cipher_algs);
-	if (ret)
-		return ret;
+		ret = ccp_register_aes_xts_algs(&cipher_algs);
+		if (ret)
+			return ret;
+	}
 
-	ret = ccp_register_sha_algs(&hash_algs);
-	if (ret)
-		return ret;
+	if (!sha_disable) {
+		ret = ccp_register_sha_algs(&hash_algs);
+		if (ret)
+			return ret;
+	}
 
 	return 0;
 }
@@ -374,50 +363,18 @@
 	}
 }
 
-static int ccp_init_queues(void)
-{
-	struct ccp_crypto_cpu_queue *cpu_queue;
-	int cpu;
-
-	req_queue.cpu_queue = alloc_percpu(struct ccp_crypto_cpu_queue);
-	if (!req_queue.cpu_queue)
-		return -ENOMEM;
-
-	for_each_possible_cpu(cpu) {
-		cpu_queue = per_cpu_ptr(req_queue.cpu_queue, cpu);
-		INIT_LIST_HEAD(&cpu_queue->cmds);
-		cpu_queue->backlog = &cpu_queue->cmds;
-		cpu_queue->cmd_count = 0;
-	}
-
-	return 0;
-}
-
-static void ccp_fini_queue(void)
-{
-	struct ccp_crypto_cpu_queue *cpu_queue;
-	int cpu;
-
-	for_each_possible_cpu(cpu) {
-		cpu_queue = per_cpu_ptr(req_queue.cpu_queue, cpu);
-		BUG_ON(!list_empty(&cpu_queue->cmds));
-	}
-	free_percpu(req_queue.cpu_queue);
-}
-
 static int ccp_crypto_init(void)
 {
 	int ret;
 
-	ret = ccp_init_queues();
-	if (ret)
-		return ret;
+	spin_lock_init(&req_queue_lock);
+	INIT_LIST_HEAD(&req_queue.cmds);
+	req_queue.backlog = &req_queue.cmds;
+	req_queue.cmd_count = 0;
 
 	ret = ccp_register_algs();
-	if (ret) {
+	if (ret)
 		ccp_unregister_algs();
-		ccp_fini_queue();
-	}
 
 	return ret;
 }
@@ -425,7 +382,6 @@
 static void ccp_crypto_exit(void)
 {
 	ccp_unregister_algs();
-	ccp_fini_queue();
 }
 
 module_init(ccp_crypto_init);
diff --git a/drivers/crypto/ccp/ccp-crypto-sha.c b/drivers/crypto/ccp/ccp-crypto-sha.c
index 3867290..873f234 100644
--- a/drivers/crypto/ccp/ccp-crypto-sha.c
+++ b/drivers/crypto/ccp/ccp-crypto-sha.c
@@ -24,75 +24,10 @@
 #include "ccp-crypto.h"
 
 
-struct ccp_sha_result {
-	struct completion completion;
-	int err;
-};
-
-static void ccp_sync_hash_complete(struct crypto_async_request *req, int err)
-{
-	struct ccp_sha_result *result = req->data;
-
-	if (err == -EINPROGRESS)
-		return;
-
-	result->err = err;
-	complete(&result->completion);
-}
-
-static int ccp_sync_hash(struct crypto_ahash *tfm, u8 *buf,
-			 struct scatterlist *sg, unsigned int len)
-{
-	struct ccp_sha_result result;
-	struct ahash_request *req;
-	int ret;
-
-	init_completion(&result.completion);
-
-	req = ahash_request_alloc(tfm, GFP_KERNEL);
-	if (!req)
-		return -ENOMEM;
-
-	ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
-				   ccp_sync_hash_complete, &result);
-	ahash_request_set_crypt(req, sg, buf, len);
-
-	ret = crypto_ahash_digest(req);
-	if ((ret == -EINPROGRESS) || (ret == -EBUSY)) {
-		ret = wait_for_completion_interruptible(&result.completion);
-		if (!ret)
-			ret = result.err;
-	}
-
-	ahash_request_free(req);
-
-	return ret;
-}
-
-static int ccp_sha_finish_hmac(struct crypto_async_request *async_req)
-{
-	struct ahash_request *req = ahash_request_cast(async_req);
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct ccp_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req);
-	struct scatterlist sg[2];
-	unsigned int block_size =
-		crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm));
-	unsigned int digest_size = crypto_ahash_digestsize(tfm);
-
-	sg_init_table(sg, ARRAY_SIZE(sg));
-	sg_set_buf(&sg[0], ctx->u.sha.opad, block_size);
-	sg_set_buf(&sg[1], rctx->ctx, digest_size);
-
-	return ccp_sync_hash(ctx->u.sha.hmac_tfm, req->result, sg,
-			     block_size + digest_size);
-}
-
 static int ccp_sha_complete(struct crypto_async_request *async_req, int ret)
 {
 	struct ahash_request *req = ahash_request_cast(async_req);
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct ccp_ctx *ctx = crypto_ahash_ctx(tfm);
 	struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req);
 	unsigned int digest_size = crypto_ahash_digestsize(tfm);
 
@@ -112,10 +47,6 @@
 	if (req->result)
 		memcpy(req->result, rctx->ctx, digest_size);
 
-	/* If we're doing an HMAC, we need to perform that on the final op */
-	if (rctx->final && ctx->u.sha.key_len)
-		ret = ccp_sha_finish_hmac(async_req);
-
 e_free:
 	sg_free_table(&rctx->data_sg);
 
@@ -126,6 +57,7 @@
 			     unsigned int final)
 {
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct ccp_ctx *ctx = crypto_ahash_ctx(tfm);
 	struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req);
 	struct scatterlist *sg;
 	unsigned int block_size =
@@ -196,6 +128,11 @@
 	rctx->cmd.u.sha.ctx_len = sizeof(rctx->ctx);
 	rctx->cmd.u.sha.src = sg;
 	rctx->cmd.u.sha.src_len = rctx->hash_cnt;
+	rctx->cmd.u.sha.opad = ctx->u.sha.key_len ?
+		&ctx->u.sha.opad_sg : NULL;
+	rctx->cmd.u.sha.opad_len = ctx->u.sha.key_len ?
+		ctx->u.sha.opad_count : 0;
+	rctx->cmd.u.sha.first = rctx->first;
 	rctx->cmd.u.sha.final = rctx->final;
 	rctx->cmd.u.sha.msg_bits = rctx->msg_bits;
 
@@ -218,7 +155,6 @@
 
 	memset(rctx, 0, sizeof(*rctx));
 
-	memcpy(rctx->ctx, alg->init, sizeof(rctx->ctx));
 	rctx->type = alg->type;
 	rctx->first = 1;
 
@@ -261,10 +197,13 @@
 			  unsigned int key_len)
 {
 	struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm));
-	struct scatterlist sg;
-	unsigned int block_size =
-		crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm));
-	unsigned int digest_size = crypto_ahash_digestsize(tfm);
+	struct crypto_shash *shash = ctx->u.sha.hmac_tfm;
+	struct {
+		struct shash_desc sdesc;
+		char ctx[crypto_shash_descsize(shash)];
+	} desc;
+	unsigned int block_size = crypto_shash_blocksize(shash);
+	unsigned int digest_size = crypto_shash_digestsize(shash);
 	int i, ret;
 
 	/* Set to zero until complete */
@@ -277,8 +216,12 @@
 
 	if (key_len > block_size) {
 		/* Must hash the input key */
-		sg_init_one(&sg, key, key_len);
-		ret = ccp_sync_hash(tfm, ctx->u.sha.key, &sg, key_len);
+		desc.sdesc.tfm = shash;
+		desc.sdesc.flags = crypto_ahash_get_flags(tfm) &
+			CRYPTO_TFM_REQ_MAY_SLEEP;
+
+		ret = crypto_shash_digest(&desc.sdesc, key, key_len,
+					  ctx->u.sha.key);
 		if (ret) {
 			crypto_ahash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
 			return -EINVAL;
@@ -293,6 +236,9 @@
 		ctx->u.sha.opad[i] = ctx->u.sha.key[i] ^ 0x5c;
 	}
 
+	sg_init_one(&ctx->u.sha.opad_sg, ctx->u.sha.opad, block_size);
+	ctx->u.sha.opad_count = block_size;
+
 	ctx->u.sha.key_len = key_len;
 
 	return 0;
@@ -319,10 +265,9 @@
 {
 	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
 	struct ccp_crypto_ahash_alg *alg = ccp_crypto_ahash_alg(tfm);
-	struct crypto_ahash *hmac_tfm;
+	struct crypto_shash *hmac_tfm;
 
-	hmac_tfm = crypto_alloc_ahash(alg->child_alg,
-				      CRYPTO_ALG_TYPE_AHASH, 0);
+	hmac_tfm = crypto_alloc_shash(alg->child_alg, 0, 0);
 	if (IS_ERR(hmac_tfm)) {
 		pr_warn("could not load driver %s need for HMAC support\n",
 			alg->child_alg);
@@ -339,35 +284,14 @@
 	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
 
 	if (ctx->u.sha.hmac_tfm)
-		crypto_free_ahash(ctx->u.sha.hmac_tfm);
+		crypto_free_shash(ctx->u.sha.hmac_tfm);
 
 	ccp_sha_cra_exit(tfm);
 }
 
-static const __be32 sha1_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
-	cpu_to_be32(SHA1_H0), cpu_to_be32(SHA1_H1),
-	cpu_to_be32(SHA1_H2), cpu_to_be32(SHA1_H3),
-	cpu_to_be32(SHA1_H4), 0, 0, 0,
-};
-
-static const __be32 sha224_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
-	cpu_to_be32(SHA224_H0), cpu_to_be32(SHA224_H1),
-	cpu_to_be32(SHA224_H2), cpu_to_be32(SHA224_H3),
-	cpu_to_be32(SHA224_H4), cpu_to_be32(SHA224_H5),
-	cpu_to_be32(SHA224_H6), cpu_to_be32(SHA224_H7),
-};
-
-static const __be32 sha256_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
-	cpu_to_be32(SHA256_H0), cpu_to_be32(SHA256_H1),
-	cpu_to_be32(SHA256_H2), cpu_to_be32(SHA256_H3),
-	cpu_to_be32(SHA256_H4), cpu_to_be32(SHA256_H5),
-	cpu_to_be32(SHA256_H6), cpu_to_be32(SHA256_H7),
-};
-
 struct ccp_sha_def {
 	const char *name;
 	const char *drv_name;
-	const __be32 *init;
 	enum ccp_sha_type type;
 	u32 digest_size;
 	u32 block_size;
@@ -377,7 +301,6 @@
 	{
 		.name		= "sha1",
 		.drv_name	= "sha1-ccp",
-		.init		= sha1_init,
 		.type		= CCP_SHA_TYPE_1,
 		.digest_size	= SHA1_DIGEST_SIZE,
 		.block_size	= SHA1_BLOCK_SIZE,
@@ -385,7 +308,6 @@
 	{
 		.name		= "sha224",
 		.drv_name	= "sha224-ccp",
-		.init		= sha224_init,
 		.type		= CCP_SHA_TYPE_224,
 		.digest_size	= SHA224_DIGEST_SIZE,
 		.block_size	= SHA224_BLOCK_SIZE,
@@ -393,7 +315,6 @@
 	{
 		.name		= "sha256",
 		.drv_name	= "sha256-ccp",
-		.init		= sha256_init,
 		.type		= CCP_SHA_TYPE_256,
 		.digest_size	= SHA256_DIGEST_SIZE,
 		.block_size	= SHA256_BLOCK_SIZE,
@@ -460,7 +381,6 @@
 
 	INIT_LIST_HEAD(&ccp_alg->entry);
 
-	ccp_alg->init = def->init;
 	ccp_alg->type = def->type;
 
 	alg = &ccp_alg->alg;
diff --git a/drivers/crypto/ccp/ccp-crypto.h b/drivers/crypto/ccp/ccp-crypto.h
index b222231..9aa4ae1 100644
--- a/drivers/crypto/ccp/ccp-crypto.h
+++ b/drivers/crypto/ccp/ccp-crypto.h
@@ -137,11 +137,14 @@
 #define MAX_SHA_BLOCK_SIZE	SHA256_BLOCK_SIZE
 
 struct ccp_sha_ctx {
+	struct scatterlist opad_sg;
+	unsigned int opad_count;
+
 	unsigned int key_len;
 	u8 key[MAX_SHA_BLOCK_SIZE];
 	u8 ipad[MAX_SHA_BLOCK_SIZE];
 	u8 opad[MAX_SHA_BLOCK_SIZE];
-	struct crypto_ahash *hmac_tfm;
+	struct crypto_shash *hmac_tfm;
 };
 
 struct ccp_sha_req_ctx {
@@ -167,9 +170,6 @@
 	unsigned int buf_count;
 	u8 buf[MAX_SHA_BLOCK_SIZE];
 
-	/* HMAC support field */
-	struct scatterlist pad_sg;
-
 	/* CCP driver command */
 	struct ccp_cmd cmd;
 };
diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c
index c3bc212..2c78161 100644
--- a/drivers/crypto/ccp/ccp-dev.c
+++ b/drivers/crypto/ccp/ccp-dev.c
@@ -30,6 +30,11 @@
 MODULE_VERSION("1.0.0");
 MODULE_DESCRIPTION("AMD Cryptographic Coprocessor driver");
 
+struct ccp_tasklet_data {
+	struct completion completion;
+	struct ccp_cmd *cmd;
+};
+
 
 static struct ccp_device *ccp_dev;
 static inline struct ccp_device *ccp_get_device(void)
@@ -192,17 +197,23 @@
 	return cmd;
 }
 
-static void ccp_do_cmd_complete(struct work_struct *work)
+static void ccp_do_cmd_complete(unsigned long data)
 {
-	struct ccp_cmd *cmd = container_of(work, struct ccp_cmd, work);
+	struct ccp_tasklet_data *tdata = (struct ccp_tasklet_data *)data;
+	struct ccp_cmd *cmd = tdata->cmd;
 
 	cmd->callback(cmd->data, cmd->ret);
+	complete(&tdata->completion);
 }
 
 static int ccp_cmd_queue_thread(void *data)
 {
 	struct ccp_cmd_queue *cmd_q = (struct ccp_cmd_queue *)data;
 	struct ccp_cmd *cmd;
+	struct ccp_tasklet_data tdata;
+	struct tasklet_struct tasklet;
+
+	tasklet_init(&tasklet, ccp_do_cmd_complete, (unsigned long)&tdata);
 
 	set_current_state(TASK_INTERRUPTIBLE);
 	while (!kthread_should_stop()) {
@@ -220,8 +231,10 @@
 		cmd->ret = ccp_run_cmd(cmd_q, cmd);
 
 		/* Schedule the completion callback */
-		INIT_WORK(&cmd->work, ccp_do_cmd_complete);
-		schedule_work(&cmd->work);
+		tdata.cmd = cmd;
+		init_completion(&tdata.completion);
+		tasklet_schedule(&tasklet);
+		wait_for_completion(&tdata.completion);
 	}
 
 	__set_current_state(TASK_RUNNING);
diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c
index 71ed3ad..9ae006d 100644
--- a/drivers/crypto/ccp/ccp-ops.c
+++ b/drivers/crypto/ccp/ccp-ops.c
@@ -23,6 +23,7 @@
 #include <linux/ccp.h>
 #include <linux/scatterlist.h>
 #include <crypto/scatterwalk.h>
+#include <crypto/sha.h>
 
 #include "ccp-dev.h"
 
@@ -132,6 +133,27 @@
 	} u;
 };
 
+/* SHA initial context values */
+static const __be32 ccp_sha1_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
+	cpu_to_be32(SHA1_H0), cpu_to_be32(SHA1_H1),
+	cpu_to_be32(SHA1_H2), cpu_to_be32(SHA1_H3),
+	cpu_to_be32(SHA1_H4), 0, 0, 0,
+};
+
+static const __be32 ccp_sha224_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
+	cpu_to_be32(SHA224_H0), cpu_to_be32(SHA224_H1),
+	cpu_to_be32(SHA224_H2), cpu_to_be32(SHA224_H3),
+	cpu_to_be32(SHA224_H4), cpu_to_be32(SHA224_H5),
+	cpu_to_be32(SHA224_H6), cpu_to_be32(SHA224_H7),
+};
+
+static const __be32 ccp_sha256_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
+	cpu_to_be32(SHA256_H0), cpu_to_be32(SHA256_H1),
+	cpu_to_be32(SHA256_H2), cpu_to_be32(SHA256_H3),
+	cpu_to_be32(SHA256_H4), cpu_to_be32(SHA256_H5),
+	cpu_to_be32(SHA256_H6), cpu_to_be32(SHA256_H7),
+};
+
 /* The CCP cannot perform zero-length sha operations so the caller
  * is required to buffer data for the final operation.  However, a
  * sha operation for a message with a total length of zero is valid
@@ -1411,7 +1433,27 @@
 	if (ret)
 		return ret;
 
-	ccp_set_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
+	if (sha->first) {
+		const __be32 *init;
+
+		switch (sha->type) {
+		case CCP_SHA_TYPE_1:
+			init = ccp_sha1_init;
+			break;
+		case CCP_SHA_TYPE_224:
+			init = ccp_sha224_init;
+			break;
+		case CCP_SHA_TYPE_256:
+			init = ccp_sha256_init;
+			break;
+		default:
+			ret = -EINVAL;
+			goto e_ctx;
+		}
+		memcpy(ctx.address, init, CCP_SHA_CTXSIZE);
+	} else
+		ccp_set_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
+
 	ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
 			      CCP_PASSTHRU_BYTESWAP_256BIT);
 	if (ret) {
@@ -1451,6 +1493,66 @@
 
 	ccp_get_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
 
+	if (sha->final && sha->opad) {
+		/* HMAC operation, recursively perform final SHA */
+		struct ccp_cmd hmac_cmd;
+		struct scatterlist sg;
+		u64 block_size, digest_size;
+		u8 *hmac_buf;
+
+		switch (sha->type) {
+		case CCP_SHA_TYPE_1:
+			block_size = SHA1_BLOCK_SIZE;
+			digest_size = SHA1_DIGEST_SIZE;
+			break;
+		case CCP_SHA_TYPE_224:
+			block_size = SHA224_BLOCK_SIZE;
+			digest_size = SHA224_DIGEST_SIZE;
+			break;
+		case CCP_SHA_TYPE_256:
+			block_size = SHA256_BLOCK_SIZE;
+			digest_size = SHA256_DIGEST_SIZE;
+			break;
+		default:
+			ret = -EINVAL;
+			goto e_data;
+		}
+
+		if (sha->opad_len != block_size) {
+			ret = -EINVAL;
+			goto e_data;
+		}
+
+		hmac_buf = kmalloc(block_size + digest_size, GFP_KERNEL);
+		if (!hmac_buf) {
+			ret = -ENOMEM;
+			goto e_data;
+		}
+		sg_init_one(&sg, hmac_buf, block_size + digest_size);
+
+		scatterwalk_map_and_copy(hmac_buf, sha->opad, 0, block_size, 0);
+		memcpy(hmac_buf + block_size, ctx.address, digest_size);
+
+		memset(&hmac_cmd, 0, sizeof(hmac_cmd));
+		hmac_cmd.engine = CCP_ENGINE_SHA;
+		hmac_cmd.u.sha.type = sha->type;
+		hmac_cmd.u.sha.ctx = sha->ctx;
+		hmac_cmd.u.sha.ctx_len = sha->ctx_len;
+		hmac_cmd.u.sha.src = &sg;
+		hmac_cmd.u.sha.src_len = block_size + digest_size;
+		hmac_cmd.u.sha.opad = NULL;
+		hmac_cmd.u.sha.opad_len = 0;
+		hmac_cmd.u.sha.first = 1;
+		hmac_cmd.u.sha.final = 1;
+		hmac_cmd.u.sha.msg_bits = (block_size + digest_size) << 3;
+
+		ret = ccp_run_sha_cmd(cmd_q, &hmac_cmd);
+		if (ret)
+			cmd->engine_error = hmac_cmd.engine_error;
+
+		kfree(hmac_buf);
+	}
+
 e_data:
 	ccp_free_data(&src, cmd_q);
 
@@ -1666,8 +1768,8 @@
 
 		op.dst.type = CCP_MEMTYPE_SYSTEM;
 		op.dst.u.dma.address = sg_dma_address(dst.sg_wa.sg);
-		op.src.u.dma.offset = dst.sg_wa.sg_used;
-		op.src.u.dma.length = op.src.u.dma.length;
+		op.dst.u.dma.offset = dst.sg_wa.sg_used;
+		op.dst.u.dma.length = op.src.u.dma.length;
 
 		ret = ccp_perform_passthru(&op);
 		if (ret) {
diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c
index a6db7fa..7bbe0ab 100644
--- a/drivers/crypto/mxs-dcp.c
+++ b/drivers/crypto/mxs-dcp.c
@@ -29,6 +29,8 @@
 #define DCP_MAX_CHANS	4
 #define DCP_BUF_SZ	PAGE_SIZE
 
+#define DCP_ALIGNMENT	64
+
 /* DCP DMA descriptor. */
 struct dcp_dma_desc {
 	uint32_t	next_cmd_addr;
@@ -48,7 +50,6 @@
 	uint8_t			sha_in_buf[DCP_BUF_SZ];
 
 	uint8_t			aes_key[2 * AES_KEYSIZE_128];
-	uint8_t			sha_digest[SHA256_DIGEST_SIZE];
 
 	struct dcp_dma_desc	desc[DCP_MAX_CHANS];
 };
@@ -83,13 +84,16 @@
 	unsigned int			hot:1;
 
 	/* Crypto-specific context */
-	unsigned int			enc:1;
-	unsigned int			ecb:1;
 	struct crypto_ablkcipher	*fallback;
 	unsigned int			key_len;
 	uint8_t				key[AES_KEYSIZE_128];
 };
 
+struct dcp_aes_req_ctx {
+	unsigned int	enc:1;
+	unsigned int	ecb:1;
+};
+
 struct dcp_sha_req_ctx {
 	unsigned int	init:1;
 	unsigned int	fini:1;
@@ -190,10 +194,12 @@
 /*
  * Encryption (AES128)
  */
-static int mxs_dcp_run_aes(struct dcp_async_ctx *actx, int init)
+static int mxs_dcp_run_aes(struct dcp_async_ctx *actx,
+			   struct ablkcipher_request *req, int init)
 {
 	struct dcp *sdcp = global_sdcp;
 	struct dcp_dma_desc *desc = &sdcp->coh->desc[actx->chan];
+	struct dcp_aes_req_ctx *rctx = ablkcipher_request_ctx(req);
 	int ret;
 
 	dma_addr_t key_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_key,
@@ -212,14 +218,14 @@
 	/* Payload contains the key. */
 	desc->control0 |= MXS_DCP_CONTROL0_PAYLOAD_KEY;
 
-	if (actx->enc)
+	if (rctx->enc)
 		desc->control0 |= MXS_DCP_CONTROL0_CIPHER_ENCRYPT;
 	if (init)
 		desc->control0 |= MXS_DCP_CONTROL0_CIPHER_INIT;
 
 	desc->control1 = MXS_DCP_CONTROL1_CIPHER_SELECT_AES128;
 
-	if (actx->ecb)
+	if (rctx->ecb)
 		desc->control1 |= MXS_DCP_CONTROL1_CIPHER_MODE_ECB;
 	else
 		desc->control1 |= MXS_DCP_CONTROL1_CIPHER_MODE_CBC;
@@ -247,6 +253,7 @@
 
 	struct ablkcipher_request *req = ablkcipher_request_cast(arq);
 	struct dcp_async_ctx *actx = crypto_tfm_ctx(arq->tfm);
+	struct dcp_aes_req_ctx *rctx = ablkcipher_request_ctx(req);
 
 	struct scatterlist *dst = req->dst;
 	struct scatterlist *src = req->src;
@@ -271,7 +278,7 @@
 	/* Copy the key from the temporary location. */
 	memcpy(key, actx->key, actx->key_len);
 
-	if (!actx->ecb) {
+	if (!rctx->ecb) {
 		/* Copy the CBC IV just past the key. */
 		memcpy(key + AES_KEYSIZE_128, req->info, AES_KEYSIZE_128);
 		/* CBC needs the INIT set. */
@@ -300,7 +307,7 @@
 			 * submit the buffer.
 			 */
 			if (actx->fill == out_off || sg_is_last(src)) {
-				ret = mxs_dcp_run_aes(actx, init);
+				ret = mxs_dcp_run_aes(actx, req, init);
 				if (ret)
 					return ret;
 				init = 0;
@@ -391,13 +398,14 @@
 	struct dcp *sdcp = global_sdcp;
 	struct crypto_async_request *arq = &req->base;
 	struct dcp_async_ctx *actx = crypto_tfm_ctx(arq->tfm);
+	struct dcp_aes_req_ctx *rctx = ablkcipher_request_ctx(req);
 	int ret;
 
 	if (unlikely(actx->key_len != AES_KEYSIZE_128))
 		return mxs_dcp_block_fallback(req, enc);
 
-	actx->enc = enc;
-	actx->ecb = ecb;
+	rctx->enc = enc;
+	rctx->ecb = ecb;
 	actx->chan = DCP_CHAN_CRYPTO;
 
 	mutex_lock(&sdcp->mutex[actx->chan]);
@@ -484,7 +492,7 @@
 		return PTR_ERR(blk);
 
 	actx->fallback = blk;
-	tfm->crt_ablkcipher.reqsize = sizeof(struct dcp_async_ctx);
+	tfm->crt_ablkcipher.reqsize = sizeof(struct dcp_aes_req_ctx);
 	return 0;
 }
 
@@ -507,13 +515,11 @@
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
 	struct dcp_async_ctx *actx = crypto_ahash_ctx(tfm);
 	struct dcp_sha_req_ctx *rctx = ahash_request_ctx(req);
+	struct hash_alg_common *halg = crypto_hash_alg_common(tfm);
 
 	struct dcp_dma_desc *desc = &sdcp->coh->desc[actx->chan];
-	dma_addr_t digest_phys = dma_map_single(sdcp->dev,
-						sdcp->coh->sha_digest,
-						SHA256_DIGEST_SIZE,
-						DMA_FROM_DEVICE);
 
+	dma_addr_t digest_phys = 0;
 	dma_addr_t buf_phys = dma_map_single(sdcp->dev, sdcp->coh->sha_in_buf,
 					     DCP_BUF_SZ, DMA_TO_DEVICE);
 
@@ -534,14 +540,18 @@
 
 	/* Set HASH_TERM bit for last transfer block. */
 	if (rctx->fini) {
+		digest_phys = dma_map_single(sdcp->dev, req->result,
+					     halg->digestsize, DMA_FROM_DEVICE);
 		desc->control0 |= MXS_DCP_CONTROL0_HASH_TERM;
 		desc->payload = digest_phys;
 	}
 
 	ret = mxs_dcp_start_dma(actx);
 
-	dma_unmap_single(sdcp->dev, digest_phys, SHA256_DIGEST_SIZE,
-			 DMA_FROM_DEVICE);
+	if (rctx->fini)
+		dma_unmap_single(sdcp->dev, digest_phys, halg->digestsize,
+				 DMA_FROM_DEVICE);
+
 	dma_unmap_single(sdcp->dev, buf_phys, DCP_BUF_SZ, DMA_TO_DEVICE);
 
 	return ret;
@@ -558,7 +568,6 @@
 	struct hash_alg_common *halg = crypto_hash_alg_common(tfm);
 	const int nents = sg_nents(req->src);
 
-	uint8_t *digest = sdcp->coh->sha_digest;
 	uint8_t *in_buf = sdcp->coh->sha_in_buf;
 
 	uint8_t *src_buf;
@@ -605,14 +614,20 @@
 		rctx->fini = 1;
 
 		/* Submit whatever is left. */
+		if (!req->result)
+			return -EINVAL;
+
 		ret = mxs_dcp_run_sha(req);
-		if (ret || !req->result)
+		if (ret)
 			return ret;
+
 		actx->fill = 0;
 
 		/* For some reason, the result is flipped. */
-		for (i = 0; i < halg->digestsize; i++)
-			req->result[i] = digest[halg->digestsize - i - 1];
+		for (i = 0; i < halg->digestsize / 2; i++) {
+			swap(req->result[i],
+			     req->result[halg->digestsize - i - 1]);
+		}
 	}
 
 	return 0;
@@ -901,9 +916,14 @@
 
 	iores = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	dcp_vmi_irq = platform_get_irq(pdev, 0);
+	if (dcp_vmi_irq < 0) {
+		ret = dcp_vmi_irq;
+		goto err_mutex;
+	}
+
 	dcp_irq = platform_get_irq(pdev, 1);
-	if (dcp_vmi_irq < 0 || dcp_irq < 0) {
-		ret = -EINVAL;
+	if (dcp_irq < 0) {
+		ret = dcp_irq;
 		goto err_mutex;
 	}
 
@@ -935,15 +955,20 @@
 	}
 
 	/* Allocate coherent helper block. */
-	sdcp->coh = kzalloc(sizeof(struct dcp_coherent_block), GFP_KERNEL);
+	sdcp->coh = devm_kzalloc(dev, sizeof(*sdcp->coh) + DCP_ALIGNMENT,
+				   GFP_KERNEL);
 	if (!sdcp->coh) {
-		dev_err(dev, "Error allocating coherent block\n");
 		ret = -ENOMEM;
 		goto err_mutex;
 	}
 
+	/* Re-align the structure so it fits the DCP constraints. */
+	sdcp->coh = PTR_ALIGN(sdcp->coh, DCP_ALIGNMENT);
+
 	/* Restart the DCP block. */
-	stmp_reset_block(sdcp->base);
+	ret = stmp_reset_block(sdcp->base);
+	if (ret)
+		goto err_mutex;
 
 	/* Initialize control register. */
 	writel(MXS_DCP_CTRL_GATHER_RESIDUAL_WRITES |
@@ -982,7 +1007,7 @@
 	if (IS_ERR(sdcp->thread[DCP_CHAN_HASH_SHA])) {
 		dev_err(dev, "Error starting SHA thread!\n");
 		ret = PTR_ERR(sdcp->thread[DCP_CHAN_HASH_SHA]);
-		goto err_free_coherent;
+		goto err_mutex;
 	}
 
 	sdcp->thread[DCP_CHAN_CRYPTO] = kthread_run(dcp_chan_thread_aes,
@@ -1040,8 +1065,6 @@
 err_destroy_sha_thread:
 	kthread_stop(sdcp->thread[DCP_CHAN_HASH_SHA]);
 
-err_free_coherent:
-	kfree(sdcp->coh);
 err_mutex:
 	mutex_unlock(&global_mutex);
 	return ret;
@@ -1051,8 +1074,6 @@
 {
 	struct dcp *sdcp = platform_get_drvdata(pdev);
 
-	kfree(sdcp->coh);
-
 	if (sdcp->caps & MXS_DCP_CAPABILITY1_SHA256)
 		crypto_unregister_ahash(&dcp_sha256_alg);
 
diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c
index dde41f1d..cb98fa5 100644
--- a/drivers/crypto/omap-aes.c
+++ b/drivers/crypto/omap-aes.c
@@ -1307,9 +1307,7 @@
 }
 #endif
 
-static const struct dev_pm_ops omap_aes_pm_ops = {
-	SET_SYSTEM_SLEEP_PM_OPS(omap_aes_suspend, omap_aes_resume)
-};
+static SIMPLE_DEV_PM_OPS(omap_aes_pm_ops, omap_aes_suspend, omap_aes_resume);
 
 static struct platform_driver omap_aes_driver = {
 	.probe	= omap_aes_probe,
diff --git a/drivers/crypto/omap-des.c b/drivers/crypto/omap-des.c
new file mode 100644
index 0000000..ec5f131
--- /dev/null
+++ b/drivers/crypto/omap-des.c
@@ -0,0 +1,1216 @@
+/*
+ * Support for OMAP DES and Triple DES HW acceleration.
+ *
+ * Copyright (c) 2013 Texas Instruments Incorporated
+ * Author: Joel Fernandes <joelf@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ */
+
+#define pr_fmt(fmt) "%s: " fmt, __func__
+
+#ifdef DEBUG
+#define prn(num) printk(#num "=%d\n", num)
+#define prx(num) printk(#num "=%x\n", num)
+#else
+#define prn(num) do { } while (0)
+#define prx(num)  do { } while (0)
+#endif
+
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/scatterlist.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/omap-dma.h>
+#include <linux/pm_runtime.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_address.h>
+#include <linux/io.h>
+#include <linux/crypto.h>
+#include <linux/interrupt.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/des.h>
+
+#define DST_MAXBURST			2
+
+#define DES_BLOCK_WORDS		(DES_BLOCK_SIZE >> 2)
+
+#define _calc_walked(inout) (dd->inout##_walk.offset - dd->inout##_sg->offset)
+
+#define DES_REG_KEY(dd, x)		((dd)->pdata->key_ofs - \
+						((x ^ 0x01) * 0x04))
+
+#define DES_REG_IV(dd, x)		((dd)->pdata->iv_ofs + ((x) * 0x04))
+
+#define DES_REG_CTRL(dd)		((dd)->pdata->ctrl_ofs)
+#define DES_REG_CTRL_CBC		BIT(4)
+#define DES_REG_CTRL_TDES		BIT(3)
+#define DES_REG_CTRL_DIRECTION		BIT(2)
+#define DES_REG_CTRL_INPUT_READY	BIT(1)
+#define DES_REG_CTRL_OUTPUT_READY	BIT(0)
+
+#define DES_REG_DATA_N(dd, x)		((dd)->pdata->data_ofs + ((x) * 0x04))
+
+#define DES_REG_REV(dd)			((dd)->pdata->rev_ofs)
+
+#define DES_REG_MASK(dd)		((dd)->pdata->mask_ofs)
+
+#define DES_REG_LENGTH_N(x)		(0x24 + ((x) * 0x04))
+
+#define DES_REG_IRQ_STATUS(dd)         ((dd)->pdata->irq_status_ofs)
+#define DES_REG_IRQ_ENABLE(dd)         ((dd)->pdata->irq_enable_ofs)
+#define DES_REG_IRQ_DATA_IN            BIT(1)
+#define DES_REG_IRQ_DATA_OUT           BIT(2)
+
+#define FLAGS_MODE_MASK		0x000f
+#define FLAGS_ENCRYPT		BIT(0)
+#define FLAGS_CBC		BIT(1)
+#define FLAGS_INIT		BIT(4)
+#define FLAGS_BUSY		BIT(6)
+
+struct omap_des_ctx {
+	struct omap_des_dev *dd;
+
+	int		keylen;
+	u32		key[(3 * DES_KEY_SIZE) / sizeof(u32)];
+	unsigned long	flags;
+};
+
+struct omap_des_reqctx {
+	unsigned long mode;
+};
+
+#define OMAP_DES_QUEUE_LENGTH	1
+#define OMAP_DES_CACHE_SIZE	0
+
+struct omap_des_algs_info {
+	struct crypto_alg	*algs_list;
+	unsigned int		size;
+	unsigned int		registered;
+};
+
+struct omap_des_pdata {
+	struct omap_des_algs_info	*algs_info;
+	unsigned int	algs_info_size;
+
+	void		(*trigger)(struct omap_des_dev *dd, int length);
+
+	u32		key_ofs;
+	u32		iv_ofs;
+	u32		ctrl_ofs;
+	u32		data_ofs;
+	u32		rev_ofs;
+	u32		mask_ofs;
+	u32             irq_enable_ofs;
+	u32             irq_status_ofs;
+
+	u32		dma_enable_in;
+	u32		dma_enable_out;
+	u32		dma_start;
+
+	u32		major_mask;
+	u32		major_shift;
+	u32		minor_mask;
+	u32		minor_shift;
+};
+
+struct omap_des_dev {
+	struct list_head	list;
+	unsigned long		phys_base;
+	void __iomem		*io_base;
+	struct omap_des_ctx	*ctx;
+	struct device		*dev;
+	unsigned long		flags;
+	int			err;
+
+	/* spinlock used for queues */
+	spinlock_t		lock;
+	struct crypto_queue	queue;
+
+	struct tasklet_struct	done_task;
+	struct tasklet_struct	queue_task;
+
+	struct ablkcipher_request	*req;
+	/*
+	 * total is used by PIO mode for book keeping so introduce
+	 * variable total_save as need it to calc page_order
+	 */
+	size_t                          total;
+	size_t                          total_save;
+
+	struct scatterlist		*in_sg;
+	struct scatterlist		*out_sg;
+
+	/* Buffers for copying for unaligned cases */
+	struct scatterlist		in_sgl;
+	struct scatterlist		out_sgl;
+	struct scatterlist		*orig_out;
+	int				sgs_copied;
+
+	struct scatter_walk		in_walk;
+	struct scatter_walk		out_walk;
+	int			dma_in;
+	struct dma_chan		*dma_lch_in;
+	int			dma_out;
+	struct dma_chan		*dma_lch_out;
+	int			in_sg_len;
+	int			out_sg_len;
+	int			pio_only;
+	const struct omap_des_pdata	*pdata;
+};
+
+/* keep registered devices data here */
+static LIST_HEAD(dev_list);
+static DEFINE_SPINLOCK(list_lock);
+
+#ifdef DEBUG
+#define omap_des_read(dd, offset)                               \
+	({                                                              \
+	 int _read_ret;                                          \
+	 _read_ret = __raw_readl(dd->io_base + offset);          \
+	 pr_err("omap_des_read(" #offset "=%#x)= %#x\n",       \
+		 offset, _read_ret);                            \
+	 _read_ret;                                              \
+	 })
+#else
+static inline u32 omap_des_read(struct omap_des_dev *dd, u32 offset)
+{
+	return __raw_readl(dd->io_base + offset);
+}
+#endif
+
+#ifdef DEBUG
+#define omap_des_write(dd, offset, value)                               \
+	do {                                                            \
+		pr_err("omap_des_write(" #offset "=%#x) value=%#x\n", \
+				offset, value);                                \
+		__raw_writel(value, dd->io_base + offset);              \
+	} while (0)
+#else
+static inline void omap_des_write(struct omap_des_dev *dd, u32 offset,
+		u32 value)
+{
+	__raw_writel(value, dd->io_base + offset);
+}
+#endif
+
+static inline void omap_des_write_mask(struct omap_des_dev *dd, u32 offset,
+					u32 value, u32 mask)
+{
+	u32 val;
+
+	val = omap_des_read(dd, offset);
+	val &= ~mask;
+	val |= value;
+	omap_des_write(dd, offset, val);
+}
+
+static void omap_des_write_n(struct omap_des_dev *dd, u32 offset,
+					u32 *value, int count)
+{
+	for (; count--; value++, offset += 4)
+		omap_des_write(dd, offset, *value);
+}
+
+static int omap_des_hw_init(struct omap_des_dev *dd)
+{
+	/*
+	 * clocks are enabled when request starts and disabled when finished.
+	 * It may be long delays between requests.
+	 * Device might go to off mode to save power.
+	 */
+	pm_runtime_get_sync(dd->dev);
+
+	if (!(dd->flags & FLAGS_INIT)) {
+		dd->flags |= FLAGS_INIT;
+		dd->err = 0;
+	}
+
+	return 0;
+}
+
+static int omap_des_write_ctrl(struct omap_des_dev *dd)
+{
+	unsigned int key32;
+	int i, err;
+	u32 val = 0, mask = 0;
+
+	err = omap_des_hw_init(dd);
+	if (err)
+		return err;
+
+	key32 = dd->ctx->keylen / sizeof(u32);
+
+	/* it seems a key should always be set even if it has not changed */
+	for (i = 0; i < key32; i++) {
+		omap_des_write(dd, DES_REG_KEY(dd, i),
+			       __le32_to_cpu(dd->ctx->key[i]));
+	}
+
+	if ((dd->flags & FLAGS_CBC) && dd->req->info)
+		omap_des_write_n(dd, DES_REG_IV(dd, 0), dd->req->info, 2);
+
+	if (dd->flags & FLAGS_CBC)
+		val |= DES_REG_CTRL_CBC;
+	if (dd->flags & FLAGS_ENCRYPT)
+		val |= DES_REG_CTRL_DIRECTION;
+	if (key32 == 6)
+		val |= DES_REG_CTRL_TDES;
+
+	mask |= DES_REG_CTRL_CBC | DES_REG_CTRL_DIRECTION | DES_REG_CTRL_TDES;
+
+	omap_des_write_mask(dd, DES_REG_CTRL(dd), val, mask);
+
+	return 0;
+}
+
+static void omap_des_dma_trigger_omap4(struct omap_des_dev *dd, int length)
+{
+	u32 mask, val;
+
+	omap_des_write(dd, DES_REG_LENGTH_N(0), length);
+
+	val = dd->pdata->dma_start;
+
+	if (dd->dma_lch_out != NULL)
+		val |= dd->pdata->dma_enable_out;
+	if (dd->dma_lch_in != NULL)
+		val |= dd->pdata->dma_enable_in;
+
+	mask = dd->pdata->dma_enable_out | dd->pdata->dma_enable_in |
+	       dd->pdata->dma_start;
+
+	omap_des_write_mask(dd, DES_REG_MASK(dd), val, mask);
+}
+
+static void omap_des_dma_stop(struct omap_des_dev *dd)
+{
+	u32 mask;
+
+	mask = dd->pdata->dma_enable_out | dd->pdata->dma_enable_in |
+	       dd->pdata->dma_start;
+
+	omap_des_write_mask(dd, DES_REG_MASK(dd), 0, mask);
+}
+
+static struct omap_des_dev *omap_des_find_dev(struct omap_des_ctx *ctx)
+{
+	struct omap_des_dev *dd = NULL, *tmp;
+
+	spin_lock_bh(&list_lock);
+	if (!ctx->dd) {
+		list_for_each_entry(tmp, &dev_list, list) {
+			/* FIXME: take fist available des core */
+			dd = tmp;
+			break;
+		}
+		ctx->dd = dd;
+	} else {
+		/* already found before */
+		dd = ctx->dd;
+	}
+	spin_unlock_bh(&list_lock);
+
+	return dd;
+}
+
+static void omap_des_dma_out_callback(void *data)
+{
+	struct omap_des_dev *dd = data;
+
+	/* dma_lch_out - completed */
+	tasklet_schedule(&dd->done_task);
+}
+
+static int omap_des_dma_init(struct omap_des_dev *dd)
+{
+	int err = -ENOMEM;
+	dma_cap_mask_t mask;
+
+	dd->dma_lch_out = NULL;
+	dd->dma_lch_in = NULL;
+
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+
+	dd->dma_lch_in = dma_request_slave_channel_compat(mask,
+							  omap_dma_filter_fn,
+							  &dd->dma_in,
+							  dd->dev, "rx");
+	if (!dd->dma_lch_in) {
+		dev_err(dd->dev, "Unable to request in DMA channel\n");
+		goto err_dma_in;
+	}
+
+	dd->dma_lch_out = dma_request_slave_channel_compat(mask,
+							   omap_dma_filter_fn,
+							   &dd->dma_out,
+							   dd->dev, "tx");
+	if (!dd->dma_lch_out) {
+		dev_err(dd->dev, "Unable to request out DMA channel\n");
+		goto err_dma_out;
+	}
+
+	return 0;
+
+err_dma_out:
+	dma_release_channel(dd->dma_lch_in);
+err_dma_in:
+	if (err)
+		pr_err("error: %d\n", err);
+	return err;
+}
+
+static void omap_des_dma_cleanup(struct omap_des_dev *dd)
+{
+	dma_release_channel(dd->dma_lch_out);
+	dma_release_channel(dd->dma_lch_in);
+}
+
+static void sg_copy_buf(void *buf, struct scatterlist *sg,
+			      unsigned int start, unsigned int nbytes, int out)
+{
+	struct scatter_walk walk;
+
+	if (!nbytes)
+		return;
+
+	scatterwalk_start(&walk, sg);
+	scatterwalk_advance(&walk, start);
+	scatterwalk_copychunks(buf, &walk, nbytes, out);
+	scatterwalk_done(&walk, out, 0);
+}
+
+static int omap_des_crypt_dma(struct crypto_tfm *tfm,
+		struct scatterlist *in_sg, struct scatterlist *out_sg,
+		int in_sg_len, int out_sg_len)
+{
+	struct omap_des_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct omap_des_dev *dd = ctx->dd;
+	struct dma_async_tx_descriptor *tx_in, *tx_out;
+	struct dma_slave_config cfg;
+	int ret;
+
+	if (dd->pio_only) {
+		scatterwalk_start(&dd->in_walk, dd->in_sg);
+		scatterwalk_start(&dd->out_walk, dd->out_sg);
+
+		/* Enable DATAIN interrupt and let it take
+		   care of the rest */
+		omap_des_write(dd, DES_REG_IRQ_ENABLE(dd), 0x2);
+		return 0;
+	}
+
+	dma_sync_sg_for_device(dd->dev, dd->in_sg, in_sg_len, DMA_TO_DEVICE);
+
+	memset(&cfg, 0, sizeof(cfg));
+
+	cfg.src_addr = dd->phys_base + DES_REG_DATA_N(dd, 0);
+	cfg.dst_addr = dd->phys_base + DES_REG_DATA_N(dd, 0);
+	cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+	cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+	cfg.src_maxburst = DST_MAXBURST;
+	cfg.dst_maxburst = DST_MAXBURST;
+
+	/* IN */
+	ret = dmaengine_slave_config(dd->dma_lch_in, &cfg);
+	if (ret) {
+		dev_err(dd->dev, "can't configure IN dmaengine slave: %d\n",
+			ret);
+		return ret;
+	}
+
+	tx_in = dmaengine_prep_slave_sg(dd->dma_lch_in, in_sg, in_sg_len,
+					DMA_MEM_TO_DEV,
+					DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+	if (!tx_in) {
+		dev_err(dd->dev, "IN prep_slave_sg() failed\n");
+		return -EINVAL;
+	}
+
+	/* No callback necessary */
+	tx_in->callback_param = dd;
+
+	/* OUT */
+	ret = dmaengine_slave_config(dd->dma_lch_out, &cfg);
+	if (ret) {
+		dev_err(dd->dev, "can't configure OUT dmaengine slave: %d\n",
+			ret);
+		return ret;
+	}
+
+	tx_out = dmaengine_prep_slave_sg(dd->dma_lch_out, out_sg, out_sg_len,
+					DMA_DEV_TO_MEM,
+					DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+	if (!tx_out) {
+		dev_err(dd->dev, "OUT prep_slave_sg() failed\n");
+		return -EINVAL;
+	}
+
+	tx_out->callback = omap_des_dma_out_callback;
+	tx_out->callback_param = dd;
+
+	dmaengine_submit(tx_in);
+	dmaengine_submit(tx_out);
+
+	dma_async_issue_pending(dd->dma_lch_in);
+	dma_async_issue_pending(dd->dma_lch_out);
+
+	/* start DMA */
+	dd->pdata->trigger(dd, dd->total);
+
+	return 0;
+}
+
+static int omap_des_crypt_dma_start(struct omap_des_dev *dd)
+{
+	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(
+					crypto_ablkcipher_reqtfm(dd->req));
+	int err;
+
+	pr_debug("total: %d\n", dd->total);
+
+	if (!dd->pio_only) {
+		err = dma_map_sg(dd->dev, dd->in_sg, dd->in_sg_len,
+				 DMA_TO_DEVICE);
+		if (!err) {
+			dev_err(dd->dev, "dma_map_sg() error\n");
+			return -EINVAL;
+		}
+
+		err = dma_map_sg(dd->dev, dd->out_sg, dd->out_sg_len,
+				 DMA_FROM_DEVICE);
+		if (!err) {
+			dev_err(dd->dev, "dma_map_sg() error\n");
+			return -EINVAL;
+		}
+	}
+
+	err = omap_des_crypt_dma(tfm, dd->in_sg, dd->out_sg, dd->in_sg_len,
+				 dd->out_sg_len);
+	if (err && !dd->pio_only) {
+		dma_unmap_sg(dd->dev, dd->in_sg, dd->in_sg_len, DMA_TO_DEVICE);
+		dma_unmap_sg(dd->dev, dd->out_sg, dd->out_sg_len,
+			     DMA_FROM_DEVICE);
+	}
+
+	return err;
+}
+
+static void omap_des_finish_req(struct omap_des_dev *dd, int err)
+{
+	struct ablkcipher_request *req = dd->req;
+
+	pr_debug("err: %d\n", err);
+
+	pm_runtime_put(dd->dev);
+	dd->flags &= ~FLAGS_BUSY;
+
+	req->base.complete(&req->base, err);
+}
+
+static int omap_des_crypt_dma_stop(struct omap_des_dev *dd)
+{
+	int err = 0;
+
+	pr_debug("total: %d\n", dd->total);
+
+	omap_des_dma_stop(dd);
+
+	dmaengine_terminate_all(dd->dma_lch_in);
+	dmaengine_terminate_all(dd->dma_lch_out);
+
+	dma_unmap_sg(dd->dev, dd->in_sg, dd->in_sg_len, DMA_TO_DEVICE);
+	dma_unmap_sg(dd->dev, dd->out_sg, dd->out_sg_len, DMA_FROM_DEVICE);
+
+	return err;
+}
+
+static int omap_des_copy_needed(struct scatterlist *sg)
+{
+	while (sg) {
+		if (!IS_ALIGNED(sg->offset, 4))
+			return -1;
+		if (!IS_ALIGNED(sg->length, DES_BLOCK_SIZE))
+			return -1;
+		sg = sg_next(sg);
+	}
+	return 0;
+}
+
+static int omap_des_copy_sgs(struct omap_des_dev *dd)
+{
+	void *buf_in, *buf_out;
+	int pages;
+
+	pages = dd->total >> PAGE_SHIFT;
+
+	if (dd->total & (PAGE_SIZE-1))
+		pages++;
+
+	BUG_ON(!pages);
+
+	buf_in = (void *)__get_free_pages(GFP_ATOMIC, pages);
+	buf_out = (void *)__get_free_pages(GFP_ATOMIC, pages);
+
+	if (!buf_in || !buf_out) {
+		pr_err("Couldn't allocated pages for unaligned cases.\n");
+		return -1;
+	}
+
+	dd->orig_out = dd->out_sg;
+
+	sg_copy_buf(buf_in, dd->in_sg, 0, dd->total, 0);
+
+	sg_init_table(&dd->in_sgl, 1);
+	sg_set_buf(&dd->in_sgl, buf_in, dd->total);
+	dd->in_sg = &dd->in_sgl;
+
+	sg_init_table(&dd->out_sgl, 1);
+	sg_set_buf(&dd->out_sgl, buf_out, dd->total);
+	dd->out_sg = &dd->out_sgl;
+
+	return 0;
+}
+
+static int omap_des_handle_queue(struct omap_des_dev *dd,
+			       struct ablkcipher_request *req)
+{
+	struct crypto_async_request *async_req, *backlog;
+	struct omap_des_ctx *ctx;
+	struct omap_des_reqctx *rctx;
+	unsigned long flags;
+	int err, ret = 0;
+
+	spin_lock_irqsave(&dd->lock, flags);
+	if (req)
+		ret = ablkcipher_enqueue_request(&dd->queue, req);
+	if (dd->flags & FLAGS_BUSY) {
+		spin_unlock_irqrestore(&dd->lock, flags);
+		return ret;
+	}
+	backlog = crypto_get_backlog(&dd->queue);
+	async_req = crypto_dequeue_request(&dd->queue);
+	if (async_req)
+		dd->flags |= FLAGS_BUSY;
+	spin_unlock_irqrestore(&dd->lock, flags);
+
+	if (!async_req)
+		return ret;
+
+	if (backlog)
+		backlog->complete(backlog, -EINPROGRESS);
+
+	req = ablkcipher_request_cast(async_req);
+
+	/* assign new request to device */
+	dd->req = req;
+	dd->total = req->nbytes;
+	dd->total_save = req->nbytes;
+	dd->in_sg = req->src;
+	dd->out_sg = req->dst;
+
+	if (omap_des_copy_needed(dd->in_sg) ||
+	    omap_des_copy_needed(dd->out_sg)) {
+		if (omap_des_copy_sgs(dd))
+			pr_err("Failed to copy SGs for unaligned cases\n");
+		dd->sgs_copied = 1;
+	} else {
+		dd->sgs_copied = 0;
+	}
+
+	dd->in_sg_len = scatterwalk_bytes_sglen(dd->in_sg, dd->total);
+	dd->out_sg_len = scatterwalk_bytes_sglen(dd->out_sg, dd->total);
+	BUG_ON(dd->in_sg_len < 0 || dd->out_sg_len < 0);
+
+	rctx = ablkcipher_request_ctx(req);
+	ctx = crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(req));
+	rctx->mode &= FLAGS_MODE_MASK;
+	dd->flags = (dd->flags & ~FLAGS_MODE_MASK) | rctx->mode;
+
+	dd->ctx = ctx;
+	ctx->dd = dd;
+
+	err = omap_des_write_ctrl(dd);
+	if (!err)
+		err = omap_des_crypt_dma_start(dd);
+	if (err) {
+		/* des_task will not finish it, so do it here */
+		omap_des_finish_req(dd, err);
+		tasklet_schedule(&dd->queue_task);
+	}
+
+	return ret; /* return ret, which is enqueue return value */
+}
+
+static void omap_des_done_task(unsigned long data)
+{
+	struct omap_des_dev *dd = (struct omap_des_dev *)data;
+	void *buf_in, *buf_out;
+	int pages;
+
+	pr_debug("enter done_task\n");
+
+	if (!dd->pio_only) {
+		dma_sync_sg_for_device(dd->dev, dd->out_sg, dd->out_sg_len,
+				       DMA_FROM_DEVICE);
+		dma_unmap_sg(dd->dev, dd->in_sg, dd->in_sg_len, DMA_TO_DEVICE);
+		dma_unmap_sg(dd->dev, dd->out_sg, dd->out_sg_len,
+			     DMA_FROM_DEVICE);
+		omap_des_crypt_dma_stop(dd);
+	}
+
+	if (dd->sgs_copied) {
+		buf_in = sg_virt(&dd->in_sgl);
+		buf_out = sg_virt(&dd->out_sgl);
+
+		sg_copy_buf(buf_out, dd->orig_out, 0, dd->total_save, 1);
+
+		pages = get_order(dd->total_save);
+		free_pages((unsigned long)buf_in, pages);
+		free_pages((unsigned long)buf_out, pages);
+	}
+
+	omap_des_finish_req(dd, 0);
+	omap_des_handle_queue(dd, NULL);
+
+	pr_debug("exit\n");
+}
+
+static void omap_des_queue_task(unsigned long data)
+{
+	struct omap_des_dev *dd = (struct omap_des_dev *)data;
+
+	omap_des_handle_queue(dd, NULL);
+}
+
+static int omap_des_crypt(struct ablkcipher_request *req, unsigned long mode)
+{
+	struct omap_des_ctx *ctx = crypto_ablkcipher_ctx(
+			crypto_ablkcipher_reqtfm(req));
+	struct omap_des_reqctx *rctx = ablkcipher_request_ctx(req);
+	struct omap_des_dev *dd;
+
+	pr_debug("nbytes: %d, enc: %d, cbc: %d\n", req->nbytes,
+		 !!(mode & FLAGS_ENCRYPT),
+		 !!(mode & FLAGS_CBC));
+
+	if (!IS_ALIGNED(req->nbytes, DES_BLOCK_SIZE)) {
+		pr_err("request size is not exact amount of DES blocks\n");
+		return -EINVAL;
+	}
+
+	dd = omap_des_find_dev(ctx);
+	if (!dd)
+		return -ENODEV;
+
+	rctx->mode = mode;
+
+	return omap_des_handle_queue(dd, req);
+}
+
+/* ********************** ALG API ************************************ */
+
+static int omap_des_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+			   unsigned int keylen)
+{
+	struct omap_des_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+
+	if (keylen != DES_KEY_SIZE && keylen != (3*DES_KEY_SIZE))
+		return -EINVAL;
+
+	pr_debug("enter, keylen: %d\n", keylen);
+
+	memcpy(ctx->key, key, keylen);
+	ctx->keylen = keylen;
+
+	return 0;
+}
+
+static int omap_des_ecb_encrypt(struct ablkcipher_request *req)
+{
+	return omap_des_crypt(req, FLAGS_ENCRYPT);
+}
+
+static int omap_des_ecb_decrypt(struct ablkcipher_request *req)
+{
+	return omap_des_crypt(req, 0);
+}
+
+static int omap_des_cbc_encrypt(struct ablkcipher_request *req)
+{
+	return omap_des_crypt(req, FLAGS_ENCRYPT | FLAGS_CBC);
+}
+
+static int omap_des_cbc_decrypt(struct ablkcipher_request *req)
+{
+	return omap_des_crypt(req, FLAGS_CBC);
+}
+
+static int omap_des_cra_init(struct crypto_tfm *tfm)
+{
+	pr_debug("enter\n");
+
+	tfm->crt_ablkcipher.reqsize = sizeof(struct omap_des_reqctx);
+
+	return 0;
+}
+
+static void omap_des_cra_exit(struct crypto_tfm *tfm)
+{
+	pr_debug("enter\n");
+}
+
+/* ********************** ALGS ************************************ */
+
+static struct crypto_alg algs_ecb_cbc[] = {
+{
+	.cra_name		= "ecb(des)",
+	.cra_driver_name	= "ecb-des-omap",
+	.cra_priority		= 100,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
+				  CRYPTO_ALG_KERN_DRIVER_ONLY |
+				  CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= DES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct omap_des_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= omap_des_cra_init,
+	.cra_exit		= omap_des_cra_exit,
+	.cra_u.ablkcipher = {
+		.min_keysize	= DES_KEY_SIZE,
+		.max_keysize	= DES_KEY_SIZE,
+		.setkey		= omap_des_setkey,
+		.encrypt	= omap_des_ecb_encrypt,
+		.decrypt	= omap_des_ecb_decrypt,
+	}
+},
+{
+	.cra_name		= "cbc(des)",
+	.cra_driver_name	= "cbc-des-omap",
+	.cra_priority		= 100,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
+				  CRYPTO_ALG_KERN_DRIVER_ONLY |
+				  CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= DES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct omap_des_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= omap_des_cra_init,
+	.cra_exit		= omap_des_cra_exit,
+	.cra_u.ablkcipher = {
+		.min_keysize	= DES_KEY_SIZE,
+		.max_keysize	= DES_KEY_SIZE,
+		.ivsize		= DES_BLOCK_SIZE,
+		.setkey		= omap_des_setkey,
+		.encrypt	= omap_des_cbc_encrypt,
+		.decrypt	= omap_des_cbc_decrypt,
+	}
+},
+{
+	.cra_name		= "ecb(des3_ede)",
+	.cra_driver_name	= "ecb-des3-omap",
+	.cra_priority		= 100,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
+				  CRYPTO_ALG_KERN_DRIVER_ONLY |
+				  CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= DES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct omap_des_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= omap_des_cra_init,
+	.cra_exit		= omap_des_cra_exit,
+	.cra_u.ablkcipher = {
+		.min_keysize	= 3*DES_KEY_SIZE,
+		.max_keysize	= 3*DES_KEY_SIZE,
+		.setkey		= omap_des_setkey,
+		.encrypt	= omap_des_ecb_encrypt,
+		.decrypt	= omap_des_ecb_decrypt,
+	}
+},
+{
+	.cra_name		= "cbc(des3_ede)",
+	.cra_driver_name	= "cbc-des3-omap",
+	.cra_priority		= 100,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
+				  CRYPTO_ALG_KERN_DRIVER_ONLY |
+				  CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= DES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct omap_des_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= omap_des_cra_init,
+	.cra_exit		= omap_des_cra_exit,
+	.cra_u.ablkcipher = {
+		.min_keysize	= 3*DES_KEY_SIZE,
+		.max_keysize	= 3*DES_KEY_SIZE,
+		.ivsize		= DES_BLOCK_SIZE,
+		.setkey		= omap_des_setkey,
+		.encrypt	= omap_des_cbc_encrypt,
+		.decrypt	= omap_des_cbc_decrypt,
+	}
+}
+};
+
+static struct omap_des_algs_info omap_des_algs_info_ecb_cbc[] = {
+	{
+		.algs_list	= algs_ecb_cbc,
+		.size		= ARRAY_SIZE(algs_ecb_cbc),
+	},
+};
+
+#ifdef CONFIG_OF
+static const struct omap_des_pdata omap_des_pdata_omap4 = {
+	.algs_info	= omap_des_algs_info_ecb_cbc,
+	.algs_info_size	= ARRAY_SIZE(omap_des_algs_info_ecb_cbc),
+	.trigger	= omap_des_dma_trigger_omap4,
+	.key_ofs	= 0x14,
+	.iv_ofs		= 0x18,
+	.ctrl_ofs	= 0x20,
+	.data_ofs	= 0x28,
+	.rev_ofs	= 0x30,
+	.mask_ofs	= 0x34,
+	.irq_status_ofs = 0x3c,
+	.irq_enable_ofs = 0x40,
+	.dma_enable_in	= BIT(5),
+	.dma_enable_out	= BIT(6),
+	.major_mask	= 0x0700,
+	.major_shift	= 8,
+	.minor_mask	= 0x003f,
+	.minor_shift	= 0,
+};
+
+static irqreturn_t omap_des_irq(int irq, void *dev_id)
+{
+	struct omap_des_dev *dd = dev_id;
+	u32 status, i;
+	u32 *src, *dst;
+
+	status = omap_des_read(dd, DES_REG_IRQ_STATUS(dd));
+	if (status & DES_REG_IRQ_DATA_IN) {
+		omap_des_write(dd, DES_REG_IRQ_ENABLE(dd), 0x0);
+
+		BUG_ON(!dd->in_sg);
+
+		BUG_ON(_calc_walked(in) > dd->in_sg->length);
+
+		src = sg_virt(dd->in_sg) + _calc_walked(in);
+
+		for (i = 0; i < DES_BLOCK_WORDS; i++) {
+			omap_des_write(dd, DES_REG_DATA_N(dd, i), *src);
+
+			scatterwalk_advance(&dd->in_walk, 4);
+			if (dd->in_sg->length == _calc_walked(in)) {
+				dd->in_sg = scatterwalk_sg_next(dd->in_sg);
+				if (dd->in_sg) {
+					scatterwalk_start(&dd->in_walk,
+							  dd->in_sg);
+					src = sg_virt(dd->in_sg) +
+					      _calc_walked(in);
+				}
+			} else {
+				src++;
+			}
+		}
+
+		/* Clear IRQ status */
+		status &= ~DES_REG_IRQ_DATA_IN;
+		omap_des_write(dd, DES_REG_IRQ_STATUS(dd), status);
+
+		/* Enable DATA_OUT interrupt */
+		omap_des_write(dd, DES_REG_IRQ_ENABLE(dd), 0x4);
+
+	} else if (status & DES_REG_IRQ_DATA_OUT) {
+		omap_des_write(dd, DES_REG_IRQ_ENABLE(dd), 0x0);
+
+		BUG_ON(!dd->out_sg);
+
+		BUG_ON(_calc_walked(out) > dd->out_sg->length);
+
+		dst = sg_virt(dd->out_sg) + _calc_walked(out);
+
+		for (i = 0; i < DES_BLOCK_WORDS; i++) {
+			*dst = omap_des_read(dd, DES_REG_DATA_N(dd, i));
+			scatterwalk_advance(&dd->out_walk, 4);
+			if (dd->out_sg->length == _calc_walked(out)) {
+				dd->out_sg = scatterwalk_sg_next(dd->out_sg);
+				if (dd->out_sg) {
+					scatterwalk_start(&dd->out_walk,
+							  dd->out_sg);
+					dst = sg_virt(dd->out_sg) +
+					      _calc_walked(out);
+				}
+			} else {
+				dst++;
+			}
+		}
+
+		dd->total -= DES_BLOCK_SIZE;
+
+		BUG_ON(dd->total < 0);
+
+		/* Clear IRQ status */
+		status &= ~DES_REG_IRQ_DATA_OUT;
+		omap_des_write(dd, DES_REG_IRQ_STATUS(dd), status);
+
+		if (!dd->total)
+			/* All bytes read! */
+			tasklet_schedule(&dd->done_task);
+		else
+			/* Enable DATA_IN interrupt for next block */
+			omap_des_write(dd, DES_REG_IRQ_ENABLE(dd), 0x2);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static const struct of_device_id omap_des_of_match[] = {
+	{
+		.compatible	= "ti,omap4-des",
+		.data		= &omap_des_pdata_omap4,
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, omap_des_of_match);
+
+static int omap_des_get_of(struct omap_des_dev *dd,
+		struct platform_device *pdev)
+{
+	const struct of_device_id *match;
+
+	match = of_match_device(of_match_ptr(omap_des_of_match), &pdev->dev);
+	if (!match) {
+		dev_err(&pdev->dev, "no compatible OF match\n");
+		return -EINVAL;
+	}
+
+	dd->dma_out = -1; /* Dummy value that's unused */
+	dd->dma_in = -1; /* Dummy value that's unused */
+	dd->pdata = match->data;
+
+	return 0;
+}
+#else
+static int omap_des_get_of(struct omap_des_dev *dd,
+		struct device *dev)
+{
+	return -EINVAL;
+}
+#endif
+
+static int omap_des_get_pdev(struct omap_des_dev *dd,
+		struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct resource *r;
+	int err = 0;
+
+	/* Get the DMA out channel */
+	r = platform_get_resource(pdev, IORESOURCE_DMA, 0);
+	if (!r) {
+		dev_err(dev, "no DMA out resource info\n");
+		err = -ENODEV;
+		goto err;
+	}
+	dd->dma_out = r->start;
+
+	/* Get the DMA in channel */
+	r = platform_get_resource(pdev, IORESOURCE_DMA, 1);
+	if (!r) {
+		dev_err(dev, "no DMA in resource info\n");
+		err = -ENODEV;
+		goto err;
+	}
+	dd->dma_in = r->start;
+
+	/* non-DT devices get pdata from pdev */
+	dd->pdata = pdev->dev.platform_data;
+
+err:
+	return err;
+}
+
+static int omap_des_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct omap_des_dev *dd;
+	struct crypto_alg *algp;
+	struct resource *res;
+	int err = -ENOMEM, i, j, irq = -1;
+	u32 reg;
+
+	dd = devm_kzalloc(dev, sizeof(struct omap_des_dev), GFP_KERNEL);
+	if (dd == NULL) {
+		dev_err(dev, "unable to alloc data struct.\n");
+		goto err_data;
+	}
+	dd->dev = dev;
+	platform_set_drvdata(pdev, dd);
+
+	spin_lock_init(&dd->lock);
+	crypto_init_queue(&dd->queue, OMAP_DES_QUEUE_LENGTH);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(dev, "no MEM resource info\n");
+		goto err_res;
+	}
+
+	err = (dev->of_node) ? omap_des_get_of(dd, pdev) :
+			       omap_des_get_pdev(dd, pdev);
+	if (err)
+		goto err_res;
+
+	dd->io_base = devm_request_and_ioremap(dev, res);
+	if (!dd->io_base) {
+		dev_err(dev, "can't ioremap\n");
+		err = -ENOMEM;
+		goto err_res;
+	}
+	dd->phys_base = res->start;
+
+	pm_runtime_enable(dev);
+	pm_runtime_get_sync(dev);
+
+	omap_des_dma_stop(dd);
+
+	reg = omap_des_read(dd, DES_REG_REV(dd));
+
+	pm_runtime_put_sync(dev);
+
+	dev_info(dev, "OMAP DES hw accel rev: %u.%u\n",
+		 (reg & dd->pdata->major_mask) >> dd->pdata->major_shift,
+		 (reg & dd->pdata->minor_mask) >> dd->pdata->minor_shift);
+
+	tasklet_init(&dd->done_task, omap_des_done_task, (unsigned long)dd);
+	tasklet_init(&dd->queue_task, omap_des_queue_task, (unsigned long)dd);
+
+	err = omap_des_dma_init(dd);
+	if (err && DES_REG_IRQ_STATUS(dd) && DES_REG_IRQ_ENABLE(dd)) {
+		dd->pio_only = 1;
+
+		irq = platform_get_irq(pdev, 0);
+		if (irq < 0) {
+			dev_err(dev, "can't get IRQ resource\n");
+			goto err_irq;
+		}
+
+		err = devm_request_irq(dev, irq, omap_des_irq, 0,
+				dev_name(dev), dd);
+		if (err) {
+			dev_err(dev, "Unable to grab omap-des IRQ\n");
+			goto err_irq;
+		}
+	}
+
+
+	INIT_LIST_HEAD(&dd->list);
+	spin_lock(&list_lock);
+	list_add_tail(&dd->list, &dev_list);
+	spin_unlock(&list_lock);
+
+	for (i = 0; i < dd->pdata->algs_info_size; i++) {
+		for (j = 0; j < dd->pdata->algs_info[i].size; j++) {
+			algp = &dd->pdata->algs_info[i].algs_list[j];
+
+			pr_debug("reg alg: %s\n", algp->cra_name);
+			INIT_LIST_HEAD(&algp->cra_list);
+
+			err = crypto_register_alg(algp);
+			if (err)
+				goto err_algs;
+
+			dd->pdata->algs_info[i].registered++;
+		}
+	}
+
+	return 0;
+err_algs:
+	for (i = dd->pdata->algs_info_size - 1; i >= 0; i--)
+		for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--)
+			crypto_unregister_alg(
+					&dd->pdata->algs_info[i].algs_list[j]);
+	if (!dd->pio_only)
+		omap_des_dma_cleanup(dd);
+err_irq:
+	tasklet_kill(&dd->done_task);
+	tasklet_kill(&dd->queue_task);
+	pm_runtime_disable(dev);
+err_res:
+	dd = NULL;
+err_data:
+	dev_err(dev, "initialization failed.\n");
+	return err;
+}
+
+static int omap_des_remove(struct platform_device *pdev)
+{
+	struct omap_des_dev *dd = platform_get_drvdata(pdev);
+	int i, j;
+
+	if (!dd)
+		return -ENODEV;
+
+	spin_lock(&list_lock);
+	list_del(&dd->list);
+	spin_unlock(&list_lock);
+
+	for (i = dd->pdata->algs_info_size - 1; i >= 0; i--)
+		for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--)
+			crypto_unregister_alg(
+					&dd->pdata->algs_info[i].algs_list[j]);
+
+	tasklet_kill(&dd->done_task);
+	tasklet_kill(&dd->queue_task);
+	omap_des_dma_cleanup(dd);
+	pm_runtime_disable(dd->dev);
+	dd = NULL;
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int omap_des_suspend(struct device *dev)
+{
+	pm_runtime_put_sync(dev);
+	return 0;
+}
+
+static int omap_des_resume(struct device *dev)
+{
+	pm_runtime_get_sync(dev);
+	return 0;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(omap_des_pm_ops, omap_des_suspend, omap_des_resume);
+
+static struct platform_driver omap_des_driver = {
+	.probe	= omap_des_probe,
+	.remove	= omap_des_remove,
+	.driver	= {
+		.name	= "omap-des",
+		.owner	= THIS_MODULE,
+		.pm	= &omap_des_pm_ops,
+		.of_match_table	= of_match_ptr(omap_des_of_match),
+	},
+};
+
+module_platform_driver(omap_des_driver);
+
+MODULE_DESCRIPTION("OMAP DES hw acceleration support.");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Joel Fernandes <joelf@ti.com>");
diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c
index a727a6a..710d863 100644
--- a/drivers/crypto/omap-sham.c
+++ b/drivers/crypto/omap-sham.c
@@ -636,11 +636,17 @@
 static size_t omap_sham_append_sg(struct omap_sham_reqctx *ctx)
 {
 	size_t count;
+	const u8 *vaddr;
 
 	while (ctx->sg) {
+		vaddr = kmap_atomic(sg_page(ctx->sg));
+
 		count = omap_sham_append_buffer(ctx,
-				sg_virt(ctx->sg) + ctx->offset,
+				vaddr + ctx->offset,
 				ctx->sg->length - ctx->offset);
+
+		kunmap_atomic((void *)vaddr);
+
 		if (!count)
 			break;
 		ctx->offset += count;
@@ -2022,9 +2028,7 @@
 }
 #endif
 
-static const struct dev_pm_ops omap_sham_pm_ops = {
-	SET_SYSTEM_SLEEP_PM_OPS(omap_sham_suspend, omap_sham_resume)
-};
+static SIMPLE_DEV_PM_OPS(omap_sham_pm_ops, omap_sham_suspend, omap_sham_resume);
 
 static struct platform_driver omap_sham_driver = {
 	.probe	= omap_sham_probe,
diff --git a/drivers/crypto/picoxcell_crypto.c b/drivers/crypto/picoxcell_crypto.c
index a6175ba..5da5b98 100644
--- a/drivers/crypto/picoxcell_crypto.c
+++ b/drivers/crypto/picoxcell_crypto.c
@@ -1720,22 +1720,16 @@
 	engine->name = dev_name(&pdev->dev);
 
 	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	engine->regs = devm_ioremap_resource(&pdev->dev, mem);
+	if (IS_ERR(engine->regs))
+		return PTR_ERR(engine->regs);
+
 	irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
-	if (!mem || !irq) {
+	if (!irq) {
 		dev_err(&pdev->dev, "no memory/irq resource for engine\n");
 		return -ENXIO;
 	}
 
-	if (!devm_request_mem_region(&pdev->dev, mem->start, resource_size(mem),
-				     engine->name))
-		return -ENOMEM;
-
-	engine->regs = devm_ioremap(&pdev->dev, mem->start, resource_size(mem));
-	if (!engine->regs) {
-		dev_err(&pdev->dev, "memory map failed\n");
-		return -ENOMEM;
-	}
-
 	if (devm_request_irq(&pdev->dev, irq->start, spacc_spacc_irq, 0,
 			     engine->name, engine)) {
 		dev_err(engine->dev, "failed to request IRQ\n");
diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c
index cf149b1..be45762 100644
--- a/drivers/crypto/s5p-sss.c
+++ b/drivers/crypto/s5p-sss.c
@@ -568,17 +568,14 @@
 	if (s5p_dev)
 		return -EEXIST;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res)
-		return -ENODEV;
-
 	pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL);
 	if (!pdata)
 		return -ENOMEM;
 
-	if (!devm_request_mem_region(dev, res->start,
-				     resource_size(res), pdev->name))
-		return -EBUSY;
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	pdata->ioaddr = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(pdata->ioaddr))
+		return PTR_ERR(pdata->ioaddr);
 
 	pdata->clk = devm_clk_get(dev, "secss");
 	if (IS_ERR(pdata->clk)) {
@@ -589,8 +586,6 @@
 	clk_enable(pdata->clk);
 
 	spin_lock_init(&pdata->lock);
-	pdata->ioaddr = devm_ioremap(dev, res->start,
-				     resource_size(res));
 
 	pdata->irq_hash = platform_get_irq_byname(pdev, "hash");
 	if (pdata->irq_hash < 0) {
diff --git a/drivers/crypto/sahara.c b/drivers/crypto/sahara.c
index 785a9de..07a5987 100644
--- a/drivers/crypto/sahara.c
+++ b/drivers/crypto/sahara.c
@@ -885,22 +885,9 @@
 
 	/* Get the base address */
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res) {
-		dev_err(&pdev->dev, "failed to get memory region resource\n");
-		return -ENODEV;
-	}
-
-	if (devm_request_mem_region(&pdev->dev, res->start,
-			resource_size(res), SAHARA_NAME) == NULL) {
-		dev_err(&pdev->dev, "failed to request memory region\n");
-		return -ENOENT;
-	}
-	dev->regs_base = devm_ioremap(&pdev->dev, res->start,
-				      resource_size(res));
-	if (!dev->regs_base) {
-		dev_err(&pdev->dev, "failed to ioremap address region\n");
-		return -ENOENT;
-	}
+	dev->regs_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(dev->regs_base))
+		return PTR_ERR(dev->regs_base);
 
 	/* Get the IRQ */
 	irq = platform_get_irq(pdev,  0);
@@ -909,10 +896,11 @@
 		return irq;
 	}
 
-	if (devm_request_irq(&pdev->dev, irq, sahara_irq_handler,
-		0, SAHARA_NAME, dev) < 0) {
+	err = devm_request_irq(&pdev->dev, irq, sahara_irq_handler,
+			       0, dev_name(&pdev->dev), dev);
+	if (err) {
 		dev_err(&pdev->dev, "failed to request irq\n");
-		return -ENOENT;
+		return err;
 	}
 
 	/* clocks */
diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 5967667..624b8be 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -2637,6 +2637,8 @@
 	if (!priv)
 		return -ENOMEM;
 
+	INIT_LIST_HEAD(&priv->alg_list);
+
 	dev_set_drvdata(dev, priv);
 
 	priv->ofdev = ofdev;
@@ -2657,8 +2659,6 @@
 			     (unsigned long)dev);
 	}
 
-	INIT_LIST_HEAD(&priv->alg_list);
-
 	priv->reg = of_iomap(np, 0);
 	if (!priv->reg) {
 		dev_err(dev, "failed to of_iomap\n");
diff --git a/drivers/crypto/tegra-aes.c b/drivers/crypto/tegra-aes.c
deleted file mode 100644
index 060eecc..0000000
--- a/drivers/crypto/tegra-aes.c
+++ /dev/null
@@ -1,1087 +0,0 @@
-/*
- * drivers/crypto/tegra-aes.c
- *
- * Driver for NVIDIA Tegra AES hardware engine residing inside the
- * Bit Stream Engine for Video (BSEV) hardware block.
- *
- * The programming sequence for this engine is with the help
- * of commands which travel via a command queue residing between the
- * CPU and the BSEV block. The BSEV engine has an internal RAM (VRAM)
- * where the final input plaintext, keys and the IV have to be copied
- * before starting the encrypt/decrypt operation.
- *
- * Copyright (c) 2010, NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/clk.h>
-#include <linux/platform_device.h>
-#include <linux/scatterlist.h>
-#include <linux/dma-mapping.h>
-#include <linux/io.h>
-#include <linux/mutex.h>
-#include <linux/interrupt.h>
-#include <linux/completion.h>
-#include <linux/workqueue.h>
-
-#include <crypto/scatterwalk.h>
-#include <crypto/aes.h>
-#include <crypto/internal/rng.h>
-
-#include "tegra-aes.h"
-
-#define FLAGS_MODE_MASK			0x00FF
-#define FLAGS_ENCRYPT			BIT(0)
-#define FLAGS_CBC			BIT(1)
-#define FLAGS_GIV			BIT(2)
-#define FLAGS_RNG			BIT(3)
-#define FLAGS_OFB			BIT(4)
-#define FLAGS_NEW_KEY			BIT(5)
-#define FLAGS_NEW_IV			BIT(6)
-#define FLAGS_INIT			BIT(7)
-#define FLAGS_FAST			BIT(8)
-#define FLAGS_BUSY			9
-
-/*
- * Defines AES engine Max process bytes size in one go, which takes 1 msec.
- * AES engine spends about 176 cycles/16-bytes or 11 cycles/byte
- * The duration CPU can use the BSE to 1 msec, then the number of available
- * cycles of AVP/BSE is 216K. In this duration, AES can process 216/11 ~= 19KB
- * Based on this AES_HW_DMA_BUFFER_SIZE_BYTES is configured to 16KB.
- */
-#define AES_HW_DMA_BUFFER_SIZE_BYTES 0x4000
-
-/*
- * The key table length is 64 bytes
- * (This includes first upto 32 bytes key + 16 bytes original initial vector
- * and 16 bytes updated initial vector)
- */
-#define AES_HW_KEY_TABLE_LENGTH_BYTES 64
-
-/*
- * The memory being used is divides as follows:
- * 1. Key - 32 bytes
- * 2. Original IV - 16 bytes
- * 3. Updated IV - 16 bytes
- * 4. Key schedule - 256 bytes
- *
- * 1+2+3 constitute the hw key table.
- */
-#define AES_HW_IV_SIZE 16
-#define AES_HW_KEYSCHEDULE_LEN 256
-#define AES_IVKEY_SIZE (AES_HW_KEY_TABLE_LENGTH_BYTES + AES_HW_KEYSCHEDULE_LEN)
-
-/* Define commands required for AES operation */
-enum {
-	CMD_BLKSTARTENGINE = 0x0E,
-	CMD_DMASETUP = 0x10,
-	CMD_DMACOMPLETE = 0x11,
-	CMD_SETTABLE = 0x15,
-	CMD_MEMDMAVD = 0x22,
-};
-
-/* Define sub-commands */
-enum {
-	SUBCMD_VRAM_SEL = 0x1,
-	SUBCMD_CRYPTO_TABLE_SEL = 0x3,
-	SUBCMD_KEY_TABLE_SEL = 0x8,
-};
-
-/* memdma_vd command */
-#define MEMDMA_DIR_DTOVRAM		0 /* sdram -> vram */
-#define MEMDMA_DIR_VTODRAM		1 /* vram -> sdram */
-#define MEMDMA_DIR_SHIFT		25
-#define MEMDMA_NUM_WORDS_SHIFT		12
-
-/* command queue bit shifts */
-enum {
-	CMDQ_KEYTABLEADDR_SHIFT = 0,
-	CMDQ_KEYTABLEID_SHIFT = 17,
-	CMDQ_VRAMSEL_SHIFT = 23,
-	CMDQ_TABLESEL_SHIFT = 24,
-	CMDQ_OPCODE_SHIFT = 26,
-};
-
-/*
- * The secure key slot contains a unique secure key generated
- * and loaded by the bootloader. This slot is marked as non-accessible
- * to the kernel.
- */
-#define SSK_SLOT_NUM		4
-
-#define AES_NR_KEYSLOTS		8
-#define TEGRA_AES_QUEUE_LENGTH	50
-#define DEFAULT_RNG_BLK_SZ	16
-
-/* The command queue depth */
-#define AES_HW_MAX_ICQ_LENGTH	5
-
-struct tegra_aes_slot {
-	struct list_head node;
-	int slot_num;
-};
-
-static struct tegra_aes_slot ssk = {
-	.slot_num = SSK_SLOT_NUM,
-};
-
-struct tegra_aes_reqctx {
-	unsigned long mode;
-};
-
-struct tegra_aes_dev {
-	struct device *dev;
-	void __iomem *io_base;
-	dma_addr_t ivkey_phys_base;
-	void __iomem *ivkey_base;
-	struct clk *aes_clk;
-	struct tegra_aes_ctx *ctx;
-	int irq;
-	unsigned long flags;
-	struct completion op_complete;
-	u32 *buf_in;
-	dma_addr_t dma_buf_in;
-	u32 *buf_out;
-	dma_addr_t dma_buf_out;
-	u8 *iv;
-	u8 dt[DEFAULT_RNG_BLK_SZ];
-	int ivlen;
-	u64 ctr;
-	spinlock_t lock;
-	struct crypto_queue queue;
-	struct tegra_aes_slot *slots;
-	struct ablkcipher_request *req;
-	size_t total;
-	struct scatterlist *in_sg;
-	size_t in_offset;
-	struct scatterlist *out_sg;
-	size_t out_offset;
-};
-
-static struct tegra_aes_dev *aes_dev;
-
-struct tegra_aes_ctx {
-	struct tegra_aes_dev *dd;
-	unsigned long flags;
-	struct tegra_aes_slot *slot;
-	u8 key[AES_MAX_KEY_SIZE];
-	size_t keylen;
-};
-
-static struct tegra_aes_ctx rng_ctx = {
-	.flags = FLAGS_NEW_KEY,
-	.keylen = AES_KEYSIZE_128,
-};
-
-/* keep registered devices data here */
-static struct list_head dev_list;
-static DEFINE_SPINLOCK(list_lock);
-static DEFINE_MUTEX(aes_lock);
-
-static void aes_workqueue_handler(struct work_struct *work);
-static DECLARE_WORK(aes_work, aes_workqueue_handler);
-static struct workqueue_struct *aes_wq;
-
-static inline u32 aes_readl(struct tegra_aes_dev *dd, u32 offset)
-{
-	return readl(dd->io_base + offset);
-}
-
-static inline void aes_writel(struct tegra_aes_dev *dd, u32 val, u32 offset)
-{
-	writel(val, dd->io_base + offset);
-}
-
-static int aes_start_crypt(struct tegra_aes_dev *dd, u32 in_addr, u32 out_addr,
-	int nblocks, int mode, bool upd_iv)
-{
-	u32 cmdq[AES_HW_MAX_ICQ_LENGTH];
-	int i, eng_busy, icq_empty, ret;
-	u32 value;
-
-	/* reset all the interrupt bits */
-	aes_writel(dd, 0xFFFFFFFF, TEGRA_AES_INTR_STATUS);
-
-	/* enable error, dma xfer complete interrupts */
-	aes_writel(dd, 0x33, TEGRA_AES_INT_ENB);
-
-	cmdq[0] = CMD_DMASETUP << CMDQ_OPCODE_SHIFT;
-	cmdq[1] = in_addr;
-	cmdq[2] = CMD_BLKSTARTENGINE << CMDQ_OPCODE_SHIFT | (nblocks-1);
-	cmdq[3] = CMD_DMACOMPLETE << CMDQ_OPCODE_SHIFT;
-
-	value = aes_readl(dd, TEGRA_AES_CMDQUE_CONTROL);
-	/* access SDRAM through AHB */
-	value &= ~TEGRA_AES_CMDQ_CTRL_SRC_STM_SEL_FIELD;
-	value &= ~TEGRA_AES_CMDQ_CTRL_DST_STM_SEL_FIELD;
-	value |= TEGRA_AES_CMDQ_CTRL_SRC_STM_SEL_FIELD |
-		 TEGRA_AES_CMDQ_CTRL_DST_STM_SEL_FIELD |
-		 TEGRA_AES_CMDQ_CTRL_ICMDQEN_FIELD;
-	aes_writel(dd, value, TEGRA_AES_CMDQUE_CONTROL);
-	dev_dbg(dd->dev, "cmd_q_ctrl=0x%x", value);
-
-	value = (0x1 << TEGRA_AES_SECURE_INPUT_ALG_SEL_SHIFT) |
-		((dd->ctx->keylen * 8) <<
-			TEGRA_AES_SECURE_INPUT_KEY_LEN_SHIFT) |
-		((u32)upd_iv << TEGRA_AES_SECURE_IV_SELECT_SHIFT);
-
-	if (mode & FLAGS_CBC) {
-		value |= ((((mode & FLAGS_ENCRYPT) ? 2 : 3)
-				<< TEGRA_AES_SECURE_XOR_POS_SHIFT) |
-			(((mode & FLAGS_ENCRYPT) ? 2 : 3)
-				<< TEGRA_AES_SECURE_VCTRAM_SEL_SHIFT) |
-			((mode & FLAGS_ENCRYPT) ? 1 : 0)
-				<< TEGRA_AES_SECURE_CORE_SEL_SHIFT);
-	} else if (mode & FLAGS_OFB) {
-		value |= ((TEGRA_AES_SECURE_XOR_POS_FIELD) |
-			(2 << TEGRA_AES_SECURE_INPUT_SEL_SHIFT) |
-			(TEGRA_AES_SECURE_CORE_SEL_FIELD));
-	} else if (mode & FLAGS_RNG) {
-		value |= (((mode & FLAGS_ENCRYPT) ? 1 : 0)
-				<< TEGRA_AES_SECURE_CORE_SEL_SHIFT |
-			  TEGRA_AES_SECURE_RNG_ENB_FIELD);
-	} else {
-		value |= (((mode & FLAGS_ENCRYPT) ? 1 : 0)
-				<< TEGRA_AES_SECURE_CORE_SEL_SHIFT);
-	}
-
-	dev_dbg(dd->dev, "secure_in_sel=0x%x", value);
-	aes_writel(dd, value, TEGRA_AES_SECURE_INPUT_SELECT);
-
-	aes_writel(dd, out_addr, TEGRA_AES_SECURE_DEST_ADDR);
-	reinit_completion(&dd->op_complete);
-
-	for (i = 0; i < AES_HW_MAX_ICQ_LENGTH - 1; i++) {
-		do {
-			value = aes_readl(dd, TEGRA_AES_INTR_STATUS);
-			eng_busy = value & TEGRA_AES_ENGINE_BUSY_FIELD;
-			icq_empty = value & TEGRA_AES_ICQ_EMPTY_FIELD;
-		} while (eng_busy && !icq_empty);
-		aes_writel(dd, cmdq[i], TEGRA_AES_ICMDQUE_WR);
-	}
-
-	ret = wait_for_completion_timeout(&dd->op_complete,
-					  msecs_to_jiffies(150));
-	if (ret == 0) {
-		dev_err(dd->dev, "timed out (0x%x)\n",
-			aes_readl(dd, TEGRA_AES_INTR_STATUS));
-		return -ETIMEDOUT;
-	}
-
-	aes_writel(dd, cmdq[AES_HW_MAX_ICQ_LENGTH - 1], TEGRA_AES_ICMDQUE_WR);
-	return 0;
-}
-
-static void aes_release_key_slot(struct tegra_aes_slot *slot)
-{
-	if (slot->slot_num == SSK_SLOT_NUM)
-		return;
-
-	spin_lock(&list_lock);
-	list_add_tail(&slot->node, &dev_list);
-	slot = NULL;
-	spin_unlock(&list_lock);
-}
-
-static struct tegra_aes_slot *aes_find_key_slot(void)
-{
-	struct tegra_aes_slot *slot = NULL;
-	struct list_head *new_head;
-	int empty;
-
-	spin_lock(&list_lock);
-	empty = list_empty(&dev_list);
-	if (!empty) {
-		slot = list_entry(&dev_list, struct tegra_aes_slot, node);
-		new_head = dev_list.next;
-		list_del(&dev_list);
-		dev_list.next = new_head->next;
-		dev_list.prev = NULL;
-	}
-	spin_unlock(&list_lock);
-
-	return slot;
-}
-
-static int aes_set_key(struct tegra_aes_dev *dd)
-{
-	u32 value, cmdq[2];
-	struct tegra_aes_ctx *ctx = dd->ctx;
-	int eng_busy, icq_empty, dma_busy;
-	bool use_ssk = false;
-
-	/* use ssk? */
-	if (!dd->ctx->slot) {
-		dev_dbg(dd->dev, "using ssk");
-		dd->ctx->slot = &ssk;
-		use_ssk = true;
-	}
-
-	/* enable key schedule generation in hardware */
-	value = aes_readl(dd, TEGRA_AES_SECURE_CONFIG_EXT);
-	value &= ~TEGRA_AES_SECURE_KEY_SCH_DIS_FIELD;
-	aes_writel(dd, value, TEGRA_AES_SECURE_CONFIG_EXT);
-
-	/* select the key slot */
-	value = aes_readl(dd, TEGRA_AES_SECURE_CONFIG);
-	value &= ~TEGRA_AES_SECURE_KEY_INDEX_FIELD;
-	value |= (ctx->slot->slot_num << TEGRA_AES_SECURE_KEY_INDEX_SHIFT);
-	aes_writel(dd, value, TEGRA_AES_SECURE_CONFIG);
-
-	if (use_ssk)
-		return 0;
-
-	/* copy the key table from sdram to vram */
-	cmdq[0] = CMD_MEMDMAVD << CMDQ_OPCODE_SHIFT |
-		MEMDMA_DIR_DTOVRAM << MEMDMA_DIR_SHIFT |
-		AES_HW_KEY_TABLE_LENGTH_BYTES / sizeof(u32) <<
-			MEMDMA_NUM_WORDS_SHIFT;
-	cmdq[1] = (u32)dd->ivkey_phys_base;
-
-	aes_writel(dd, cmdq[0], TEGRA_AES_ICMDQUE_WR);
-	aes_writel(dd, cmdq[1], TEGRA_AES_ICMDQUE_WR);
-
-	do {
-		value = aes_readl(dd, TEGRA_AES_INTR_STATUS);
-		eng_busy = value & TEGRA_AES_ENGINE_BUSY_FIELD;
-		icq_empty = value & TEGRA_AES_ICQ_EMPTY_FIELD;
-		dma_busy = value & TEGRA_AES_DMA_BUSY_FIELD;
-	} while (eng_busy && !icq_empty && dma_busy);
-
-	/* settable command to get key into internal registers */
-	value = CMD_SETTABLE << CMDQ_OPCODE_SHIFT |
-		SUBCMD_CRYPTO_TABLE_SEL << CMDQ_TABLESEL_SHIFT |
-		SUBCMD_VRAM_SEL << CMDQ_VRAMSEL_SHIFT |
-		(SUBCMD_KEY_TABLE_SEL | ctx->slot->slot_num) <<
-			CMDQ_KEYTABLEID_SHIFT;
-	aes_writel(dd, value, TEGRA_AES_ICMDQUE_WR);
-
-	do {
-		value = aes_readl(dd, TEGRA_AES_INTR_STATUS);
-		eng_busy = value & TEGRA_AES_ENGINE_BUSY_FIELD;
-		icq_empty = value & TEGRA_AES_ICQ_EMPTY_FIELD;
-	} while (eng_busy && !icq_empty);
-
-	return 0;
-}
-
-static int tegra_aes_handle_req(struct tegra_aes_dev *dd)
-{
-	struct crypto_async_request *async_req, *backlog;
-	struct crypto_ablkcipher *tfm;
-	struct tegra_aes_ctx *ctx;
-	struct tegra_aes_reqctx *rctx;
-	struct ablkcipher_request *req;
-	unsigned long flags;
-	int dma_max = AES_HW_DMA_BUFFER_SIZE_BYTES;
-	int ret = 0, nblocks, total;
-	int count = 0;
-	dma_addr_t addr_in, addr_out;
-	struct scatterlist *in_sg, *out_sg;
-
-	if (!dd)
-		return -EINVAL;
-
-	spin_lock_irqsave(&dd->lock, flags);
-	backlog = crypto_get_backlog(&dd->queue);
-	async_req = crypto_dequeue_request(&dd->queue);
-	if (!async_req)
-		clear_bit(FLAGS_BUSY, &dd->flags);
-	spin_unlock_irqrestore(&dd->lock, flags);
-
-	if (!async_req)
-		return -ENODATA;
-
-	if (backlog)
-		backlog->complete(backlog, -EINPROGRESS);
-
-	req = ablkcipher_request_cast(async_req);
-
-	dev_dbg(dd->dev, "%s: get new req\n", __func__);
-
-	if (!req->src || !req->dst)
-		return -EINVAL;
-
-	/* take mutex to access the aes hw */
-	mutex_lock(&aes_lock);
-
-	/* assign new request to device */
-	dd->req = req;
-	dd->total = req->nbytes;
-	dd->in_offset = 0;
-	dd->in_sg = req->src;
-	dd->out_offset = 0;
-	dd->out_sg = req->dst;
-
-	in_sg = dd->in_sg;
-	out_sg = dd->out_sg;
-
-	total = dd->total;
-
-	tfm = crypto_ablkcipher_reqtfm(req);
-	rctx = ablkcipher_request_ctx(req);
-	ctx = crypto_ablkcipher_ctx(tfm);
-	rctx->mode &= FLAGS_MODE_MASK;
-	dd->flags = (dd->flags & ~FLAGS_MODE_MASK) | rctx->mode;
-
-	dd->iv = (u8 *)req->info;
-	dd->ivlen = crypto_ablkcipher_ivsize(tfm);
-
-	/* assign new context to device */
-	ctx->dd = dd;
-	dd->ctx = ctx;
-
-	if (ctx->flags & FLAGS_NEW_KEY) {
-		/* copy the key */
-		memcpy(dd->ivkey_base, ctx->key, ctx->keylen);
-		memset(dd->ivkey_base + ctx->keylen, 0, AES_HW_KEY_TABLE_LENGTH_BYTES - ctx->keylen);
-		aes_set_key(dd);
-		ctx->flags &= ~FLAGS_NEW_KEY;
-	}
-
-	if (((dd->flags & FLAGS_CBC) || (dd->flags & FLAGS_OFB)) && dd->iv) {
-		/* set iv to the aes hw slot
-		 * Hw generates updated iv only after iv is set in slot.
-		 * So key and iv is passed asynchronously.
-		 */
-		memcpy(dd->buf_in, dd->iv, dd->ivlen);
-
-		ret = aes_start_crypt(dd, (u32)dd->dma_buf_in,
-				      dd->dma_buf_out, 1, FLAGS_CBC, false);
-		if (ret < 0) {
-			dev_err(dd->dev, "aes_start_crypt fail(%d)\n", ret);
-			goto out;
-		}
-	}
-
-	while (total) {
-		dev_dbg(dd->dev, "remain: %d\n", total);
-		ret = dma_map_sg(dd->dev, in_sg, 1, DMA_TO_DEVICE);
-		if (!ret) {
-			dev_err(dd->dev, "dma_map_sg() error\n");
-			goto out;
-		}
-
-		ret = dma_map_sg(dd->dev, out_sg, 1, DMA_FROM_DEVICE);
-		if (!ret) {
-			dev_err(dd->dev, "dma_map_sg() error\n");
-			dma_unmap_sg(dd->dev, dd->in_sg,
-				1, DMA_TO_DEVICE);
-			goto out;
-		}
-
-		addr_in = sg_dma_address(in_sg);
-		addr_out = sg_dma_address(out_sg);
-		dd->flags |= FLAGS_FAST;
-		count = min_t(int, sg_dma_len(in_sg), dma_max);
-		WARN_ON(sg_dma_len(in_sg) != sg_dma_len(out_sg));
-		nblocks = DIV_ROUND_UP(count, AES_BLOCK_SIZE);
-
-		ret = aes_start_crypt(dd, addr_in, addr_out, nblocks,
-			dd->flags, true);
-
-		dma_unmap_sg(dd->dev, out_sg, 1, DMA_FROM_DEVICE);
-		dma_unmap_sg(dd->dev, in_sg, 1, DMA_TO_DEVICE);
-
-		if (ret < 0) {
-			dev_err(dd->dev, "aes_start_crypt fail(%d)\n", ret);
-			goto out;
-		}
-		dd->flags &= ~FLAGS_FAST;
-
-		dev_dbg(dd->dev, "out: copied %d\n", count);
-		total -= count;
-		in_sg = sg_next(in_sg);
-		out_sg = sg_next(out_sg);
-		WARN_ON(((total != 0) && (!in_sg || !out_sg)));
-	}
-
-out:
-	mutex_unlock(&aes_lock);
-
-	dd->total = total;
-
-	if (dd->req->base.complete)
-		dd->req->base.complete(&dd->req->base, ret);
-
-	dev_dbg(dd->dev, "%s: exit\n", __func__);
-	return ret;
-}
-
-static int tegra_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
-			    unsigned int keylen)
-{
-	struct tegra_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-	struct tegra_aes_dev *dd = aes_dev;
-	struct tegra_aes_slot *key_slot;
-
-	if ((keylen != AES_KEYSIZE_128) && (keylen != AES_KEYSIZE_192) &&
-		(keylen != AES_KEYSIZE_256)) {
-		dev_err(dd->dev, "unsupported key size\n");
-		crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
-		return -EINVAL;
-	}
-
-	dev_dbg(dd->dev, "keylen: %d\n", keylen);
-
-	ctx->dd = dd;
-
-	if (key) {
-		if (!ctx->slot) {
-			key_slot = aes_find_key_slot();
-			if (!key_slot) {
-				dev_err(dd->dev, "no empty slot\n");
-				return -ENOMEM;
-			}
-
-			ctx->slot = key_slot;
-		}
-
-		memcpy(ctx->key, key, keylen);
-		ctx->keylen = keylen;
-	}
-
-	ctx->flags |= FLAGS_NEW_KEY;
-	dev_dbg(dd->dev, "done\n");
-	return 0;
-}
-
-static void aes_workqueue_handler(struct work_struct *work)
-{
-	struct tegra_aes_dev *dd = aes_dev;
-	int ret;
-
-	ret = clk_prepare_enable(dd->aes_clk);
-	if (ret)
-		BUG_ON("clock enable failed");
-
-	/* empty the crypto queue and then return */
-	do {
-		ret = tegra_aes_handle_req(dd);
-	} while (!ret);
-
-	clk_disable_unprepare(dd->aes_clk);
-}
-
-static irqreturn_t aes_irq(int irq, void *dev_id)
-{
-	struct tegra_aes_dev *dd = (struct tegra_aes_dev *)dev_id;
-	u32 value = aes_readl(dd, TEGRA_AES_INTR_STATUS);
-	int busy = test_bit(FLAGS_BUSY, &dd->flags);
-
-	if (!busy) {
-		dev_dbg(dd->dev, "spurious interrupt\n");
-		return IRQ_NONE;
-	}
-
-	dev_dbg(dd->dev, "irq_stat: 0x%x\n", value);
-	if (value & TEGRA_AES_INT_ERROR_MASK)
-		aes_writel(dd, TEGRA_AES_INT_ERROR_MASK, TEGRA_AES_INTR_STATUS);
-
-	if (!(value & TEGRA_AES_ENGINE_BUSY_FIELD))
-		complete(&dd->op_complete);
-	else
-		return IRQ_NONE;
-
-	return IRQ_HANDLED;
-}
-
-static int tegra_aes_crypt(struct ablkcipher_request *req, unsigned long mode)
-{
-	struct tegra_aes_reqctx *rctx = ablkcipher_request_ctx(req);
-	struct tegra_aes_dev *dd = aes_dev;
-	unsigned long flags;
-	int err = 0;
-	int busy;
-
-	dev_dbg(dd->dev, "nbytes: %d, enc: %d, cbc: %d, ofb: %d\n",
-		req->nbytes, !!(mode & FLAGS_ENCRYPT),
-		!!(mode & FLAGS_CBC), !!(mode & FLAGS_OFB));
-
-	rctx->mode = mode;
-
-	spin_lock_irqsave(&dd->lock, flags);
-	err = ablkcipher_enqueue_request(&dd->queue, req);
-	busy = test_and_set_bit(FLAGS_BUSY, &dd->flags);
-	spin_unlock_irqrestore(&dd->lock, flags);
-
-	if (!busy)
-		queue_work(aes_wq, &aes_work);
-
-	return err;
-}
-
-static int tegra_aes_ecb_encrypt(struct ablkcipher_request *req)
-{
-	return tegra_aes_crypt(req, FLAGS_ENCRYPT);
-}
-
-static int tegra_aes_ecb_decrypt(struct ablkcipher_request *req)
-{
-	return tegra_aes_crypt(req, 0);
-}
-
-static int tegra_aes_cbc_encrypt(struct ablkcipher_request *req)
-{
-	return tegra_aes_crypt(req, FLAGS_ENCRYPT | FLAGS_CBC);
-}
-
-static int tegra_aes_cbc_decrypt(struct ablkcipher_request *req)
-{
-	return tegra_aes_crypt(req, FLAGS_CBC);
-}
-
-static int tegra_aes_ofb_encrypt(struct ablkcipher_request *req)
-{
-	return tegra_aes_crypt(req, FLAGS_ENCRYPT | FLAGS_OFB);
-}
-
-static int tegra_aes_ofb_decrypt(struct ablkcipher_request *req)
-{
-	return tegra_aes_crypt(req, FLAGS_OFB);
-}
-
-static int tegra_aes_get_random(struct crypto_rng *tfm, u8 *rdata,
-				unsigned int dlen)
-{
-	struct tegra_aes_dev *dd = aes_dev;
-	struct tegra_aes_ctx *ctx = &rng_ctx;
-	int ret, i;
-	u8 *dest = rdata, *dt = dd->dt;
-
-	/* take mutex to access the aes hw */
-	mutex_lock(&aes_lock);
-
-	ret = clk_prepare_enable(dd->aes_clk);
-	if (ret) {
-		mutex_unlock(&aes_lock);
-		return ret;
-	}
-
-	ctx->dd = dd;
-	dd->ctx = ctx;
-	dd->flags = FLAGS_ENCRYPT | FLAGS_RNG;
-
-	memcpy(dd->buf_in, dt, DEFAULT_RNG_BLK_SZ);
-
-	ret = aes_start_crypt(dd, (u32)dd->dma_buf_in,
-			      (u32)dd->dma_buf_out, 1, dd->flags, true);
-	if (ret < 0) {
-		dev_err(dd->dev, "aes_start_crypt fail(%d)\n", ret);
-		dlen = ret;
-		goto out;
-	}
-	memcpy(dest, dd->buf_out, dlen);
-
-	/* update the DT */
-	for (i = DEFAULT_RNG_BLK_SZ - 1; i >= 0; i--) {
-		dt[i] += 1;
-		if (dt[i] != 0)
-			break;
-	}
-
-out:
-	clk_disable_unprepare(dd->aes_clk);
-	mutex_unlock(&aes_lock);
-
-	dev_dbg(dd->dev, "%s: done\n", __func__);
-	return dlen;
-}
-
-static int tegra_aes_rng_reset(struct crypto_rng *tfm, u8 *seed,
-			       unsigned int slen)
-{
-	struct tegra_aes_dev *dd = aes_dev;
-	struct tegra_aes_ctx *ctx = &rng_ctx;
-	struct tegra_aes_slot *key_slot;
-	int ret = 0;
-	u8 tmp[16]; /* 16 bytes = 128 bits of entropy */
-	u8 *dt;
-
-	if (!ctx || !dd) {
-		pr_err("ctx=0x%x, dd=0x%x\n",
-			(unsigned int)ctx, (unsigned int)dd);
-		return -EINVAL;
-	}
-
-	if (slen < (DEFAULT_RNG_BLK_SZ + AES_KEYSIZE_128)) {
-		dev_err(dd->dev, "seed size invalid");
-		return -ENOMEM;
-	}
-
-	/* take mutex to access the aes hw */
-	mutex_lock(&aes_lock);
-
-	if (!ctx->slot) {
-		key_slot = aes_find_key_slot();
-		if (!key_slot) {
-			dev_err(dd->dev, "no empty slot\n");
-			mutex_unlock(&aes_lock);
-			return -ENOMEM;
-		}
-		ctx->slot = key_slot;
-	}
-
-	ctx->dd = dd;
-	dd->ctx = ctx;
-	dd->ctr = 0;
-
-	ctx->keylen = AES_KEYSIZE_128;
-	ctx->flags |= FLAGS_NEW_KEY;
-
-	/* copy the key to the key slot */
-	memcpy(dd->ivkey_base, seed + DEFAULT_RNG_BLK_SZ, AES_KEYSIZE_128);
-	memset(dd->ivkey_base + AES_KEYSIZE_128, 0, AES_HW_KEY_TABLE_LENGTH_BYTES - AES_KEYSIZE_128);
-
-	dd->iv = seed;
-	dd->ivlen = slen;
-
-	dd->flags = FLAGS_ENCRYPT | FLAGS_RNG;
-
-	ret = clk_prepare_enable(dd->aes_clk);
-	if (ret) {
-		mutex_unlock(&aes_lock);
-		return ret;
-	}
-
-	aes_set_key(dd);
-
-	/* set seed to the aes hw slot */
-	memcpy(dd->buf_in, dd->iv, DEFAULT_RNG_BLK_SZ);
-	ret = aes_start_crypt(dd, (u32)dd->dma_buf_in,
-			      dd->dma_buf_out, 1, FLAGS_CBC, false);
-	if (ret < 0) {
-		dev_err(dd->dev, "aes_start_crypt fail(%d)\n", ret);
-		goto out;
-	}
-
-	if (dd->ivlen >= (2 * DEFAULT_RNG_BLK_SZ + AES_KEYSIZE_128)) {
-		dt = dd->iv + DEFAULT_RNG_BLK_SZ + AES_KEYSIZE_128;
-	} else {
-		get_random_bytes(tmp, sizeof(tmp));
-		dt = tmp;
-	}
-	memcpy(dd->dt, dt, DEFAULT_RNG_BLK_SZ);
-
-out:
-	clk_disable_unprepare(dd->aes_clk);
-	mutex_unlock(&aes_lock);
-
-	dev_dbg(dd->dev, "%s: done\n", __func__);
-	return ret;
-}
-
-static int tegra_aes_cra_init(struct crypto_tfm *tfm)
-{
-	tfm->crt_ablkcipher.reqsize = sizeof(struct tegra_aes_reqctx);
-
-	return 0;
-}
-
-static void tegra_aes_cra_exit(struct crypto_tfm *tfm)
-{
-	struct tegra_aes_ctx *ctx =
-		crypto_ablkcipher_ctx((struct crypto_ablkcipher *)tfm);
-
-	if (ctx && ctx->slot)
-		aes_release_key_slot(ctx->slot);
-}
-
-static struct crypto_alg algs[] = {
-	{
-		.cra_name = "ecb(aes)",
-		.cra_driver_name = "ecb-aes-tegra",
-		.cra_priority = 300,
-		.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-		.cra_blocksize = AES_BLOCK_SIZE,
-		.cra_alignmask = 3,
-		.cra_type = &crypto_ablkcipher_type,
-		.cra_u.ablkcipher = {
-			.min_keysize = AES_MIN_KEY_SIZE,
-			.max_keysize = AES_MAX_KEY_SIZE,
-			.setkey = tegra_aes_setkey,
-			.encrypt = tegra_aes_ecb_encrypt,
-			.decrypt = tegra_aes_ecb_decrypt,
-		},
-	}, {
-		.cra_name = "cbc(aes)",
-		.cra_driver_name = "cbc-aes-tegra",
-		.cra_priority = 300,
-		.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-		.cra_blocksize = AES_BLOCK_SIZE,
-		.cra_alignmask = 3,
-		.cra_type = &crypto_ablkcipher_type,
-		.cra_u.ablkcipher = {
-			.min_keysize = AES_MIN_KEY_SIZE,
-			.max_keysize = AES_MAX_KEY_SIZE,
-			.ivsize = AES_MIN_KEY_SIZE,
-			.setkey = tegra_aes_setkey,
-			.encrypt = tegra_aes_cbc_encrypt,
-			.decrypt = tegra_aes_cbc_decrypt,
-		}
-	}, {
-		.cra_name = "ofb(aes)",
-		.cra_driver_name = "ofb-aes-tegra",
-		.cra_priority = 300,
-		.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-		.cra_blocksize = AES_BLOCK_SIZE,
-		.cra_alignmask = 3,
-		.cra_type = &crypto_ablkcipher_type,
-		.cra_u.ablkcipher = {
-			.min_keysize = AES_MIN_KEY_SIZE,
-			.max_keysize = AES_MAX_KEY_SIZE,
-			.ivsize = AES_MIN_KEY_SIZE,
-			.setkey = tegra_aes_setkey,
-			.encrypt = tegra_aes_ofb_encrypt,
-			.decrypt = tegra_aes_ofb_decrypt,
-		}
-	}, {
-		.cra_name = "ansi_cprng",
-		.cra_driver_name = "rng-aes-tegra",
-		.cra_flags = CRYPTO_ALG_TYPE_RNG,
-		.cra_ctxsize = sizeof(struct tegra_aes_ctx),
-		.cra_type = &crypto_rng_type,
-		.cra_u.rng = {
-			.rng_make_random = tegra_aes_get_random,
-			.rng_reset = tegra_aes_rng_reset,
-			.seedsize = AES_KEYSIZE_128 + (2 * DEFAULT_RNG_BLK_SZ),
-		}
-	}
-};
-
-static int tegra_aes_probe(struct platform_device *pdev)
-{
-	struct device *dev = &pdev->dev;
-	struct tegra_aes_dev *dd;
-	struct resource *res;
-	int err = -ENOMEM, i = 0, j;
-
-	dd = devm_kzalloc(dev, sizeof(struct tegra_aes_dev), GFP_KERNEL);
-	if (dd == NULL) {
-		dev_err(dev, "unable to alloc data struct.\n");
-		return err;
-	}
-
-	dd->dev = dev;
-	platform_set_drvdata(pdev, dd);
-
-	dd->slots = devm_kzalloc(dev, sizeof(struct tegra_aes_slot) *
-				 AES_NR_KEYSLOTS, GFP_KERNEL);
-	if (dd->slots == NULL) {
-		dev_err(dev, "unable to alloc slot struct.\n");
-		goto out;
-	}
-
-	spin_lock_init(&dd->lock);
-	crypto_init_queue(&dd->queue, TEGRA_AES_QUEUE_LENGTH);
-
-	/* Get the module base address */
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res) {
-		dev_err(dev, "invalid resource type: base\n");
-		err = -ENODEV;
-		goto out;
-	}
-
-	if (!devm_request_mem_region(&pdev->dev, res->start,
-				     resource_size(res),
-				     dev_name(&pdev->dev))) {
-		dev_err(&pdev->dev, "Couldn't request MEM resource\n");
-		return -ENODEV;
-	}
-
-	dd->io_base = devm_ioremap(dev, res->start, resource_size(res));
-	if (!dd->io_base) {
-		dev_err(dev, "can't ioremap register space\n");
-		err = -ENOMEM;
-		goto out;
-	}
-
-	/* Initialize the vde clock */
-	dd->aes_clk = devm_clk_get(dev, "vde");
-	if (IS_ERR(dd->aes_clk)) {
-		dev_err(dev, "iclock intialization failed.\n");
-		err = -ENODEV;
-		goto out;
-	}
-
-	err = clk_set_rate(dd->aes_clk, ULONG_MAX);
-	if (err) {
-		dev_err(dd->dev, "iclk set_rate fail(%d)\n", err);
-		goto out;
-	}
-
-	/*
-	 * the foll contiguous memory is allocated as follows -
-	 * - hardware key table
-	 * - key schedule
-	 */
-	dd->ivkey_base = dma_alloc_coherent(dev, AES_HW_KEY_TABLE_LENGTH_BYTES,
-					    &dd->ivkey_phys_base,
-		GFP_KERNEL);
-	if (!dd->ivkey_base) {
-		dev_err(dev, "can not allocate iv/key buffer\n");
-		err = -ENOMEM;
-		goto out;
-	}
-
-	dd->buf_in = dma_alloc_coherent(dev, AES_HW_DMA_BUFFER_SIZE_BYTES,
-					&dd->dma_buf_in, GFP_KERNEL);
-	if (!dd->buf_in) {
-		dev_err(dev, "can not allocate dma-in buffer\n");
-		err = -ENOMEM;
-		goto out;
-	}
-
-	dd->buf_out = dma_alloc_coherent(dev, AES_HW_DMA_BUFFER_SIZE_BYTES,
-					 &dd->dma_buf_out, GFP_KERNEL);
-	if (!dd->buf_out) {
-		dev_err(dev, "can not allocate dma-out buffer\n");
-		err = -ENOMEM;
-		goto out;
-	}
-
-	init_completion(&dd->op_complete);
-	aes_wq = alloc_workqueue("tegra_aes_wq", WQ_HIGHPRI | WQ_UNBOUND, 1);
-	if (!aes_wq) {
-		dev_err(dev, "alloc_workqueue failed\n");
-		err = -ENOMEM;
-		goto out;
-	}
-
-	/* get the irq */
-	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
-	if (!res) {
-		dev_err(dev, "invalid resource type: base\n");
-		err = -ENODEV;
-		goto out;
-	}
-	dd->irq = res->start;
-
-	err = devm_request_irq(dev, dd->irq, aes_irq, IRQF_TRIGGER_HIGH |
-				IRQF_SHARED, "tegra-aes", dd);
-	if (err) {
-		dev_err(dev, "request_irq failed\n");
-		goto out;
-	}
-
-	mutex_init(&aes_lock);
-	INIT_LIST_HEAD(&dev_list);
-
-	spin_lock_init(&list_lock);
-	spin_lock(&list_lock);
-	for (i = 0; i < AES_NR_KEYSLOTS; i++) {
-		if (i == SSK_SLOT_NUM)
-			continue;
-		dd->slots[i].slot_num = i;
-		INIT_LIST_HEAD(&dd->slots[i].node);
-		list_add_tail(&dd->slots[i].node, &dev_list);
-	}
-	spin_unlock(&list_lock);
-
-	aes_dev = dd;
-	for (i = 0; i < ARRAY_SIZE(algs); i++) {
-		algs[i].cra_priority = 300;
-		algs[i].cra_ctxsize = sizeof(struct tegra_aes_ctx);
-		algs[i].cra_module = THIS_MODULE;
-		algs[i].cra_init = tegra_aes_cra_init;
-		algs[i].cra_exit = tegra_aes_cra_exit;
-
-		err = crypto_register_alg(&algs[i]);
-		if (err)
-			goto out;
-	}
-
-	dev_info(dev, "registered");
-	return 0;
-
-out:
-	for (j = 0; j < i; j++)
-		crypto_unregister_alg(&algs[j]);
-	if (dd->ivkey_base)
-		dma_free_coherent(dev, AES_HW_KEY_TABLE_LENGTH_BYTES,
-			dd->ivkey_base, dd->ivkey_phys_base);
-	if (dd->buf_in)
-		dma_free_coherent(dev, AES_HW_DMA_BUFFER_SIZE_BYTES,
-			dd->buf_in, dd->dma_buf_in);
-	if (dd->buf_out)
-		dma_free_coherent(dev, AES_HW_DMA_BUFFER_SIZE_BYTES,
-			dd->buf_out, dd->dma_buf_out);
-	if (aes_wq)
-		destroy_workqueue(aes_wq);
-	spin_lock(&list_lock);
-	list_del(&dev_list);
-	spin_unlock(&list_lock);
-
-	aes_dev = NULL;
-
-	dev_err(dev, "%s: initialization failed.\n", __func__);
-	return err;
-}
-
-static int tegra_aes_remove(struct platform_device *pdev)
-{
-	struct device *dev = &pdev->dev;
-	struct tegra_aes_dev *dd = platform_get_drvdata(pdev);
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(algs); i++)
-		crypto_unregister_alg(&algs[i]);
-
-	cancel_work_sync(&aes_work);
-	destroy_workqueue(aes_wq);
-	spin_lock(&list_lock);
-	list_del(&dev_list);
-	spin_unlock(&list_lock);
-
-	dma_free_coherent(dev, AES_HW_KEY_TABLE_LENGTH_BYTES,
-			  dd->ivkey_base, dd->ivkey_phys_base);
-	dma_free_coherent(dev, AES_HW_DMA_BUFFER_SIZE_BYTES,
-			  dd->buf_in, dd->dma_buf_in);
-	dma_free_coherent(dev, AES_HW_DMA_BUFFER_SIZE_BYTES,
-			  dd->buf_out, dd->dma_buf_out);
-	aes_dev = NULL;
-
-	return 0;
-}
-
-static struct of_device_id tegra_aes_of_match[] = {
-	{ .compatible = "nvidia,tegra20-aes", },
-	{ .compatible = "nvidia,tegra30-aes", },
-	{ },
-};
-
-static struct platform_driver tegra_aes_driver = {
-	.probe  = tegra_aes_probe,
-	.remove = tegra_aes_remove,
-	.driver = {
-		.name   = "tegra-aes",
-		.owner  = THIS_MODULE,
-		.of_match_table = tegra_aes_of_match,
-	},
-};
-
-module_platform_driver(tegra_aes_driver);
-
-MODULE_DESCRIPTION("Tegra AES/OFB/CPRNG hw acceleration support.");
-MODULE_AUTHOR("NVIDIA Corporation");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/crypto/tegra-aes.h b/drivers/crypto/tegra-aes.h
deleted file mode 100644
index 6006333..0000000
--- a/drivers/crypto/tegra-aes.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2010, NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- */
-
-#ifndef __CRYPTODEV_TEGRA_AES_H
-#define __CRYPTODEV_TEGRA_AES_H
-
-#define TEGRA_AES_ICMDQUE_WR			0x1000
-#define TEGRA_AES_CMDQUE_CONTROL		0x1008
-#define TEGRA_AES_INTR_STATUS			0x1018
-#define TEGRA_AES_INT_ENB			0x1040
-#define TEGRA_AES_CONFIG			0x1044
-#define TEGRA_AES_IRAM_ACCESS_CFG		0x10A0
-#define TEGRA_AES_SECURE_DEST_ADDR		0x1100
-#define TEGRA_AES_SECURE_INPUT_SELECT		0x1104
-#define TEGRA_AES_SECURE_CONFIG			0x1108
-#define TEGRA_AES_SECURE_CONFIG_EXT		0x110C
-#define TEGRA_AES_SECURE_SECURITY		0x1110
-#define TEGRA_AES_SECURE_HASH_RESULT0		0x1120
-#define TEGRA_AES_SECURE_HASH_RESULT1		0x1124
-#define TEGRA_AES_SECURE_HASH_RESULT2		0x1128
-#define TEGRA_AES_SECURE_HASH_RESULT3		0x112C
-#define TEGRA_AES_SECURE_SEC_SEL0		0x1140
-#define TEGRA_AES_SECURE_SEC_SEL1		0x1144
-#define TEGRA_AES_SECURE_SEC_SEL2		0x1148
-#define TEGRA_AES_SECURE_SEC_SEL3		0x114C
-#define TEGRA_AES_SECURE_SEC_SEL4		0x1150
-#define TEGRA_AES_SECURE_SEC_SEL5		0x1154
-#define TEGRA_AES_SECURE_SEC_SEL6		0x1158
-#define TEGRA_AES_SECURE_SEC_SEL7		0x115C
-
-/* interrupt status reg masks and shifts */
-#define TEGRA_AES_ENGINE_BUSY_FIELD		BIT(0)
-#define TEGRA_AES_ICQ_EMPTY_FIELD		BIT(3)
-#define TEGRA_AES_DMA_BUSY_FIELD		BIT(23)
-
-/* secure select reg masks and shifts */
-#define TEGRA_AES_SECURE_SEL0_KEYREAD_ENB0_FIELD	BIT(0)
-
-/* secure config ext masks and shifts */
-#define TEGRA_AES_SECURE_KEY_SCH_DIS_FIELD	BIT(15)
-
-/* secure config masks and shifts */
-#define TEGRA_AES_SECURE_KEY_INDEX_SHIFT	20
-#define TEGRA_AES_SECURE_KEY_INDEX_FIELD	(0x1F << TEGRA_AES_SECURE_KEY_INDEX_SHIFT)
-#define TEGRA_AES_SECURE_BLOCK_CNT_SHIFT	0
-#define TEGRA_AES_SECURE_BLOCK_CNT_FIELD	(0xFFFFF << TEGRA_AES_SECURE_BLOCK_CNT_SHIFT)
-
-/* stream interface select masks and shifts */
-#define TEGRA_AES_CMDQ_CTRL_UCMDQEN_FIELD	BIT(0)
-#define TEGRA_AES_CMDQ_CTRL_ICMDQEN_FIELD	BIT(1)
-#define TEGRA_AES_CMDQ_CTRL_SRC_STM_SEL_FIELD	BIT(4)
-#define TEGRA_AES_CMDQ_CTRL_DST_STM_SEL_FIELD	BIT(5)
-
-/* config register masks and shifts */
-#define TEGRA_AES_CONFIG_ENDIAN_ENB_FIELD	BIT(10)
-#define TEGRA_AES_CONFIG_MODE_SEL_SHIFT		0
-#define TEGRA_AES_CONFIG_MODE_SEL_FIELD		(0x1F << TEGRA_AES_CONFIG_MODE_SEL_SHIFT)
-
-/* extended config */
-#define TEGRA_AES_SECURE_OFFSET_CNT_SHIFT	24
-#define TEGRA_AES_SECURE_OFFSET_CNT_FIELD	(0xFF << TEGRA_AES_SECURE_OFFSET_CNT_SHIFT)
-#define TEGRA_AES_SECURE_KEYSCHED_GEN_FIELD	BIT(15)
-
-/* init vector select */
-#define TEGRA_AES_SECURE_IV_SELECT_SHIFT	10
-#define TEGRA_AES_SECURE_IV_SELECT_FIELD	BIT(10)
-
-/* secure engine input */
-#define TEGRA_AES_SECURE_INPUT_ALG_SEL_SHIFT	28
-#define TEGRA_AES_SECURE_INPUT_ALG_SEL_FIELD	(0xF << TEGRA_AES_SECURE_INPUT_ALG_SEL_SHIFT)
-#define TEGRA_AES_SECURE_INPUT_KEY_LEN_SHIFT	16
-#define TEGRA_AES_SECURE_INPUT_KEY_LEN_FIELD	(0xFFF << TEGRA_AES_SECURE_INPUT_KEY_LEN_SHIFT)
-#define TEGRA_AES_SECURE_RNG_ENB_FIELD		BIT(11)
-#define TEGRA_AES_SECURE_CORE_SEL_SHIFT		9
-#define TEGRA_AES_SECURE_CORE_SEL_FIELD		BIT(9)
-#define TEGRA_AES_SECURE_VCTRAM_SEL_SHIFT	7
-#define TEGRA_AES_SECURE_VCTRAM_SEL_FIELD	(0x3 << TEGRA_AES_SECURE_VCTRAM_SEL_SHIFT)
-#define TEGRA_AES_SECURE_INPUT_SEL_SHIFT	5
-#define TEGRA_AES_SECURE_INPUT_SEL_FIELD	(0x3 << TEGRA_AES_SECURE_INPUT_SEL_SHIFT)
-#define TEGRA_AES_SECURE_XOR_POS_SHIFT		3
-#define TEGRA_AES_SECURE_XOR_POS_FIELD		(0x3 << TEGRA_AES_SECURE_XOR_POS_SHIFT)
-#define TEGRA_AES_SECURE_HASH_ENB_FIELD		BIT(2)
-#define TEGRA_AES_SECURE_ON_THE_FLY_FIELD	BIT(0)
-
-/* interrupt error mask */
-#define TEGRA_AES_INT_ERROR_MASK		0xFFF000
-
-#endif
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index e73c19e..016c2f1 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -100,9 +100,12 @@
 	void *page;
 	u8 *buffer;
 	u8 *iv;
+	unsigned int ivsize;
 
 	int flags;
-	unsigned int blocksize;
+	unsigned int walk_blocksize;
+	unsigned int cipher_blocksize;
+	unsigned int alignmask;
 };
 
 struct ablkcipher_walk {
@@ -192,6 +195,10 @@
 int blkcipher_walk_virt_block(struct blkcipher_desc *desc,
 			      struct blkcipher_walk *walk,
 			      unsigned int blocksize);
+int blkcipher_aead_walk_virt_block(struct blkcipher_desc *desc,
+				   struct blkcipher_walk *walk,
+				   struct crypto_aead *tfm,
+				   unsigned int blocksize);
 
 int ablkcipher_walk_done(struct ablkcipher_request *req,
 			 struct ablkcipher_walk *walk, int err);
diff --git a/include/crypto/null.h b/include/crypto/null.h
new file mode 100644
index 0000000..b7c864c
--- /dev/null
+++ b/include/crypto/null.h
@@ -0,0 +1,11 @@
+/* Values for NULL algorithms */
+
+#ifndef _CRYPTO_NULL_H
+#define _CRYPTO_NULL_H
+
+#define NULL_KEY_SIZE		0
+#define NULL_BLOCK_SIZE		1
+#define NULL_DIGEST_SIZE	0
+#define NULL_IV_SIZE		0
+
+#endif
diff --git a/include/linux/ccp.h b/include/linux/ccp.h
index b941ab9..ebcc9d1 100644
--- a/include/linux/ccp.h
+++ b/include/linux/ccp.h
@@ -232,6 +232,9 @@
  * @ctx_len: length in bytes of hash value
  * @src: data to be used for this operation
  * @src_len: length in bytes of data used for this operation
+ * @opad: data to be used for final HMAC operation
+ * @opad_len: length in bytes of data used for final HMAC operation
+ * @first: indicates first SHA operation
  * @final: indicates final SHA operation
  * @msg_bits: total length of the message in bits used in final SHA operation
  *
@@ -251,6 +254,10 @@
 	struct scatterlist *src;
 	u64 src_len;		/* In bytes */
 
+	struct scatterlist *opad;
+	u32 opad_len;		/* In bytes */
+
+	u32 first;		/* Indicates first sha cmd */
 	u32 final;		/* Indicates final sha cmd */
 	u64 msg_bits;		/* Message length in bits required for
 				 * final sha cmd */