Merge git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
diff --git a/arch/arm/mach-omap2/clock2420_data.c b/arch/arm/mach-omap2/clock2420_data.c
index d932b14..1820a55 100644
--- a/arch/arm/mach-omap2/clock2420_data.c
+++ b/arch/arm/mach-omap2/clock2420_data.c
@@ -1836,7 +1836,7 @@
 	CLK(NULL,	"vlynq_ick",	&vlynq_ick,	CK_242X),
 	CLK(NULL,	"vlynq_fck",	&vlynq_fck,	CK_242X),
 	CLK(NULL,	"des_ick",	&des_ick,	CK_242X),
-	CLK(NULL,	"sha_ick",	&sha_ick,	CK_242X),
+	CLK("omap-sham",	"ick",	&sha_ick,	CK_242X),
 	CLK("omap_rng",	"ick",		&rng_ick,	CK_242X),
 	CLK(NULL,	"aes_ick",	&aes_ick,	CK_242X),
 	CLK(NULL,	"pka_ick",	&pka_ick,	CK_242X),
diff --git a/arch/arm/mach-omap2/clock2430_data.c b/arch/arm/mach-omap2/clock2430_data.c
index 0438b6e..5884ac6 100644
--- a/arch/arm/mach-omap2/clock2430_data.c
+++ b/arch/arm/mach-omap2/clock2430_data.c
@@ -1924,7 +1924,7 @@
 	CLK(NULL,	"sdma_ick",	&sdma_ick,	CK_243X),
 	CLK(NULL,	"sdrc_ick",	&sdrc_ick,	CK_243X),
 	CLK(NULL,	"des_ick",	&des_ick,	CK_243X),
-	CLK(NULL,	"sha_ick",	&sha_ick,	CK_243X),
+	CLK("omap-sham",	"ick",	&sha_ick,	CK_243X),
 	CLK("omap_rng",	"ick",		&rng_ick,	CK_243X),
 	CLK(NULL,	"aes_ick",	&aes_ick,	CK_243X),
 	CLK(NULL,	"pka_ick",	&pka_ick,	CK_243X),
diff --git a/arch/arm/mach-omap2/clock3xxx_data.c b/arch/arm/mach-omap2/clock3xxx_data.c
index 9cba556..52638df 100644
--- a/arch/arm/mach-omap2/clock3xxx_data.c
+++ b/arch/arm/mach-omap2/clock3xxx_data.c
@@ -3360,7 +3360,7 @@
 	CLK("mmci-omap-hs.2",	"ick",	&mmchs3_ick,	CK_3430ES2 | CK_AM35XX),
 	CLK(NULL,	"icr_ick",	&icr_ick,	CK_343X),
 	CLK(NULL,	"aes2_ick",	&aes2_ick,	CK_343X),
-	CLK(NULL,	"sha12_ick",	&sha12_ick,	CK_343X),
+	CLK("omap-sham",	"ick",	&sha12_ick,	CK_343X),
 	CLK(NULL,	"des2_ick",	&des2_ick,	CK_343X),
 	CLK("mmci-omap-hs.1",	"ick",	&mmchs2_ick,	CK_3XXX),
 	CLK("mmci-omap-hs.0",	"ick",	&mmchs1_ick,	CK_3XXX),
diff --git a/arch/arm/mach-omap2/devices.c b/arch/arm/mach-omap2/devices.c
index 2271b9b..beac46c 100644
--- a/arch/arm/mach-omap2/devices.c
+++ b/arch/arm/mach-omap2/devices.c
@@ -26,6 +26,7 @@
 #include <plat/mux.h>
 #include <mach/gpio.h>
 #include <plat/mmc.h>
+#include <plat/dma.h>
 
 #include "mux.h"
 
@@ -453,8 +454,10 @@
 static inline void omap_init_mcspi(void) {}
 #endif
 
-#ifdef CONFIG_OMAP_SHA1_MD5
-static struct resource sha1_md5_resources[] = {
+#if defined(CONFIG_CRYPTO_DEV_OMAP_SHAM) || defined(CONFIG_CRYPTO_DEV_OMAP_SHAM_MODULE)
+
+#ifdef CONFIG_ARCH_OMAP24XX
+static struct resource omap2_sham_resources[] = {
 	{
 		.start	= OMAP24XX_SEC_SHA1MD5_BASE,
 		.end	= OMAP24XX_SEC_SHA1MD5_BASE + 0x64,
@@ -465,20 +468,55 @@
 		.flags	= IORESOURCE_IRQ,
 	}
 };
+static int omap2_sham_resources_sz = ARRAY_SIZE(omap2_sham_resources);
+#else
+#define omap2_sham_resources		NULL
+#define omap2_sham_resources_sz		0
+#endif
 
-static struct platform_device sha1_md5_device = {
-	.name		= "OMAP SHA1/MD5",
+#ifdef CONFIG_ARCH_OMAP34XX
+static struct resource omap3_sham_resources[] = {
+	{
+		.start	= OMAP34XX_SEC_SHA1MD5_BASE,
+		.end	= OMAP34XX_SEC_SHA1MD5_BASE + 0x64,
+		.flags	= IORESOURCE_MEM,
+	},
+	{
+		.start	= INT_34XX_SHA1MD52_IRQ,
+		.flags	= IORESOURCE_IRQ,
+	},
+	{
+		.start	= OMAP34XX_DMA_SHA1MD5_RX,
+		.flags	= IORESOURCE_DMA,
+	}
+};
+static int omap3_sham_resources_sz = ARRAY_SIZE(omap3_sham_resources);
+#else
+#define omap3_sham_resources		NULL
+#define omap3_sham_resources_sz		0
+#endif
+
+static struct platform_device sham_device = {
+	.name		= "omap-sham",
 	.id		= -1,
-	.num_resources	= ARRAY_SIZE(sha1_md5_resources),
-	.resource	= sha1_md5_resources,
 };
 
-static void omap_init_sha1_md5(void)
+static void omap_init_sham(void)
 {
-	platform_device_register(&sha1_md5_device);
+	if (cpu_is_omap24xx()) {
+		sham_device.resource = omap2_sham_resources;
+		sham_device.num_resources = omap2_sham_resources_sz;
+	} else if (cpu_is_omap34xx()) {
+		sham_device.resource = omap3_sham_resources;
+		sham_device.num_resources = omap3_sham_resources_sz;
+	} else {
+		pr_err("%s: platform not supported\n", __func__);
+		return;
+	}
+	platform_device_register(&sham_device);
 }
 #else
-static inline void omap_init_sha1_md5(void) { }
+static inline void omap_init_sham(void) { }
 #endif
 
 /*-------------------------------------------------------------------------*/
@@ -799,7 +837,7 @@
 	omap_init_mcspi();
 	omap_hdq_init();
 	omap_init_sti();
-	omap_init_sha1_md5();
+	omap_init_sham();
 
 	return 0;
 }
diff --git a/arch/arm/plat-omap/include/plat/omap34xx.h b/arch/arm/plat-omap/include/plat/omap34xx.h
index 2845fdc..98fc8b4 100644
--- a/arch/arm/plat-omap/include/plat/omap34xx.h
+++ b/arch/arm/plat-omap/include/plat/omap34xx.h
@@ -82,5 +82,10 @@
 
 #define OMAP34XX_MAILBOX_BASE		(L4_34XX_BASE + 0x94000)
 
+/* Security */
+#define OMAP34XX_SEC_BASE	(L4_34XX_BASE + 0xA0000)
+#define OMAP34XX_SEC_SHA1MD5_BASE	(OMAP34XX_SEC_BASE + 0x23000)
+#define OMAP34XX_SEC_AES_BASE	(OMAP34XX_SEC_BASE + 0x25000)
+
 #endif /* __ASM_ARCH_OMAP3_H */
 
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index 20bb0e1a..ff16756 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -32,6 +32,9 @@
 #define IN	IN1
 #define KEY	%xmm2
 #define IV	%xmm3
+#define BSWAP_MASK %xmm10
+#define CTR	%xmm11
+#define INC	%xmm12
 
 #define KEYP	%rdi
 #define OUTP	%rsi
@@ -42,6 +45,7 @@
 #define T1	%r10
 #define TKEYP	T1
 #define T2	%r11
+#define TCTR_LOW T2
 
 _key_expansion_128:
 _key_expansion_256a:
@@ -724,3 +728,114 @@
 	movups IV, (IVP)
 .Lcbc_dec_just_ret:
 	ret
+
+.align 16
+.Lbswap_mask:
+	.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+
+/*
+ * _aesni_inc_init:	internal ABI
+ *	setup registers used by _aesni_inc
+ * input:
+ *	IV
+ * output:
+ *	CTR:	== IV, in little endian
+ *	TCTR_LOW: == lower qword of CTR
+ *	INC:	== 1, in little endian
+ *	BSWAP_MASK == endian swapping mask
+ */
+_aesni_inc_init:
+	movaps .Lbswap_mask, BSWAP_MASK
+	movaps IV, CTR
+	PSHUFB_XMM BSWAP_MASK CTR
+	mov $1, TCTR_LOW
+	MOVQ_R64_XMM TCTR_LOW INC
+	MOVQ_R64_XMM CTR TCTR_LOW
+	ret
+
+/*
+ * _aesni_inc:		internal ABI
+ *	Increase IV by 1, IV is in big endian
+ * input:
+ *	IV
+ *	CTR:	== IV, in little endian
+ *	TCTR_LOW: == lower qword of CTR
+ *	INC:	== 1, in little endian
+ *	BSWAP_MASK == endian swapping mask
+ * output:
+ *	IV:	Increase by 1
+ * changed:
+ *	CTR:	== output IV, in little endian
+ *	TCTR_LOW: == lower qword of CTR
+ */
+_aesni_inc:
+	paddq INC, CTR
+	add $1, TCTR_LOW
+	jnc .Linc_low
+	pslldq $8, INC
+	paddq INC, CTR
+	psrldq $8, INC
+.Linc_low:
+	movaps CTR, IV
+	PSHUFB_XMM BSWAP_MASK IV
+	ret
+
+/*
+ * void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
+ *		      size_t len, u8 *iv)
+ */
+ENTRY(aesni_ctr_enc)
+	cmp $16, LEN
+	jb .Lctr_enc_just_ret
+	mov 480(KEYP), KLEN
+	movups (IVP), IV
+	call _aesni_inc_init
+	cmp $64, LEN
+	jb .Lctr_enc_loop1
+.align 4
+.Lctr_enc_loop4:
+	movaps IV, STATE1
+	call _aesni_inc
+	movups (INP), IN1
+	movaps IV, STATE2
+	call _aesni_inc
+	movups 0x10(INP), IN2
+	movaps IV, STATE3
+	call _aesni_inc
+	movups 0x20(INP), IN3
+	movaps IV, STATE4
+	call _aesni_inc
+	movups 0x30(INP), IN4
+	call _aesni_enc4
+	pxor IN1, STATE1
+	movups STATE1, (OUTP)
+	pxor IN2, STATE2
+	movups STATE2, 0x10(OUTP)
+	pxor IN3, STATE3
+	movups STATE3, 0x20(OUTP)
+	pxor IN4, STATE4
+	movups STATE4, 0x30(OUTP)
+	sub $64, LEN
+	add $64, INP
+	add $64, OUTP
+	cmp $64, LEN
+	jge .Lctr_enc_loop4
+	cmp $16, LEN
+	jb .Lctr_enc_ret
+.align 4
+.Lctr_enc_loop1:
+	movaps IV, STATE
+	call _aesni_inc
+	movups (INP), IN
+	call _aesni_enc1
+	pxor IN, STATE
+	movups STATE, (OUTP)
+	sub $16, LEN
+	add $16, INP
+	add $16, OUTP
+	cmp $16, LEN
+	jge .Lctr_enc_loop1
+.Lctr_enc_ret:
+	movups IV, (IVP)
+.Lctr_enc_just_ret:
+	ret
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 49c552c..2cb3dcc 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -18,6 +18,7 @@
 #include <crypto/algapi.h>
 #include <crypto/aes.h>
 #include <crypto/cryptd.h>
+#include <crypto/ctr.h>
 #include <asm/i387.h>
 #include <asm/aes.h>
 
@@ -58,6 +59,8 @@
 			      const u8 *in, unsigned int len, u8 *iv);
 asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out,
 			      const u8 *in, unsigned int len, u8 *iv);
+asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out,
+			      const u8 *in, unsigned int len, u8 *iv);
 
 static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx)
 {
@@ -321,6 +324,72 @@
 	},
 };
 
+static void ctr_crypt_final(struct crypto_aes_ctx *ctx,
+			    struct blkcipher_walk *walk)
+{
+	u8 *ctrblk = walk->iv;
+	u8 keystream[AES_BLOCK_SIZE];
+	u8 *src = walk->src.virt.addr;
+	u8 *dst = walk->dst.virt.addr;
+	unsigned int nbytes = walk->nbytes;
+
+	aesni_enc(ctx, keystream, ctrblk);
+	crypto_xor(keystream, src, nbytes);
+	memcpy(dst, keystream, nbytes);
+	crypto_inc(ctrblk, AES_BLOCK_SIZE);
+}
+
+static int ctr_crypt(struct blkcipher_desc *desc,
+		     struct scatterlist *dst, struct scatterlist *src,
+		     unsigned int nbytes)
+{
+	struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	kernel_fpu_begin();
+	while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
+		aesni_ctr_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
+			      nbytes & AES_BLOCK_MASK, walk.iv);
+		nbytes &= AES_BLOCK_SIZE - 1;
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+	if (walk.nbytes) {
+		ctr_crypt_final(ctx, &walk);
+		err = blkcipher_walk_done(desc, &walk, 0);
+	}
+	kernel_fpu_end();
+
+	return err;
+}
+
+static struct crypto_alg blk_ctr_alg = {
+	.cra_name		= "__ctr-aes-aesni",
+	.cra_driver_name	= "__driver-ctr-aes-aesni",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1,
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(blk_ctr_alg.cra_list),
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= AES_MIN_KEY_SIZE,
+			.max_keysize	= AES_MAX_KEY_SIZE,
+			.ivsize		= AES_BLOCK_SIZE,
+			.setkey		= aes_set_key,
+			.encrypt	= ctr_crypt,
+			.decrypt	= ctr_crypt,
+		},
+	},
+};
+
 static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
 			unsigned int key_len)
 {
@@ -467,13 +536,11 @@
 	},
 };
 
-#ifdef HAS_CTR
 static int ablk_ctr_init(struct crypto_tfm *tfm)
 {
 	struct cryptd_ablkcipher *cryptd_tfm;
 
-	cryptd_tfm = cryptd_alloc_ablkcipher("fpu(ctr(__driver-aes-aesni))",
-					     0, 0);
+	cryptd_tfm = cryptd_alloc_ablkcipher("__driver-ctr-aes-aesni", 0, 0);
 	if (IS_ERR(cryptd_tfm))
 		return PTR_ERR(cryptd_tfm);
 	ablk_init_common(tfm, cryptd_tfm);
@@ -500,11 +567,50 @@
 			.ivsize		= AES_BLOCK_SIZE,
 			.setkey		= ablk_set_key,
 			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
+			.decrypt	= ablk_encrypt,
 			.geniv		= "chainiv",
 		},
 	},
 };
+
+#ifdef HAS_CTR
+static int ablk_rfc3686_ctr_init(struct crypto_tfm *tfm)
+{
+	struct cryptd_ablkcipher *cryptd_tfm;
+
+	cryptd_tfm = cryptd_alloc_ablkcipher(
+		"rfc3686(__driver-ctr-aes-aesni)", 0, 0);
+	if (IS_ERR(cryptd_tfm))
+		return PTR_ERR(cryptd_tfm);
+	ablk_init_common(tfm, cryptd_tfm);
+	return 0;
+}
+
+static struct crypto_alg ablk_rfc3686_ctr_alg = {
+	.cra_name		= "rfc3686(ctr(aes))",
+	.cra_driver_name	= "rfc3686-ctr-aes-aesni",
+	.cra_priority		= 400,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct async_aes_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(ablk_rfc3686_ctr_alg.cra_list),
+	.cra_init		= ablk_rfc3686_ctr_init,
+	.cra_exit		= ablk_exit,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize = AES_MIN_KEY_SIZE+CTR_RFC3686_NONCE_SIZE,
+			.max_keysize = AES_MAX_KEY_SIZE+CTR_RFC3686_NONCE_SIZE,
+			.ivsize	     = CTR_RFC3686_IV_SIZE,
+			.setkey	     = ablk_set_key,
+			.encrypt     = ablk_encrypt,
+			.decrypt     = ablk_decrypt,
+			.geniv	     = "seqiv",
+		},
+	},
+};
 #endif
 
 #ifdef HAS_LRW
@@ -640,13 +746,17 @@
 		goto blk_ecb_err;
 	if ((err = crypto_register_alg(&blk_cbc_alg)))
 		goto blk_cbc_err;
+	if ((err = crypto_register_alg(&blk_ctr_alg)))
+		goto blk_ctr_err;
 	if ((err = crypto_register_alg(&ablk_ecb_alg)))
 		goto ablk_ecb_err;
 	if ((err = crypto_register_alg(&ablk_cbc_alg)))
 		goto ablk_cbc_err;
-#ifdef HAS_CTR
 	if ((err = crypto_register_alg(&ablk_ctr_alg)))
 		goto ablk_ctr_err;
+#ifdef HAS_CTR
+	if ((err = crypto_register_alg(&ablk_rfc3686_ctr_alg)))
+		goto ablk_rfc3686_ctr_err;
 #endif
 #ifdef HAS_LRW
 	if ((err = crypto_register_alg(&ablk_lrw_alg)))
@@ -675,13 +785,17 @@
 ablk_lrw_err:
 #endif
 #ifdef HAS_CTR
+	crypto_unregister_alg(&ablk_rfc3686_ctr_alg);
+ablk_rfc3686_ctr_err:
+#endif
 	crypto_unregister_alg(&ablk_ctr_alg);
 ablk_ctr_err:
-#endif
 	crypto_unregister_alg(&ablk_cbc_alg);
 ablk_cbc_err:
 	crypto_unregister_alg(&ablk_ecb_alg);
 ablk_ecb_err:
+	crypto_unregister_alg(&blk_ctr_alg);
+blk_ctr_err:
 	crypto_unregister_alg(&blk_cbc_alg);
 blk_cbc_err:
 	crypto_unregister_alg(&blk_ecb_alg);
@@ -705,10 +819,12 @@
 	crypto_unregister_alg(&ablk_lrw_alg);
 #endif
 #ifdef HAS_CTR
-	crypto_unregister_alg(&ablk_ctr_alg);
+	crypto_unregister_alg(&ablk_rfc3686_ctr_alg);
 #endif
+	crypto_unregister_alg(&ablk_ctr_alg);
 	crypto_unregister_alg(&ablk_cbc_alg);
 	crypto_unregister_alg(&ablk_ecb_alg);
+	crypto_unregister_alg(&blk_ctr_alg);
 	crypto_unregister_alg(&blk_cbc_alg);
 	crypto_unregister_alg(&blk_ecb_alg);
 	crypto_unregister_alg(&__aesni_alg);
diff --git a/arch/x86/include/asm/inst.h b/arch/x86/include/asm/inst.h
index 14cf526..280bf7f 100644
--- a/arch/x86/include/asm/inst.h
+++ b/arch/x86/include/asm/inst.h
@@ -7,7 +7,66 @@
 
 #ifdef __ASSEMBLY__
 
+#define REG_NUM_INVALID		100
+
+#define REG_TYPE_R64		0
+#define REG_TYPE_XMM		1
+#define REG_TYPE_INVALID	100
+
+	.macro R64_NUM opd r64
+	\opd = REG_NUM_INVALID
+	.ifc \r64,%rax
+	\opd = 0
+	.endif
+	.ifc \r64,%rcx
+	\opd = 1
+	.endif
+	.ifc \r64,%rdx
+	\opd = 2
+	.endif
+	.ifc \r64,%rbx
+	\opd = 3
+	.endif
+	.ifc \r64,%rsp
+	\opd = 4
+	.endif
+	.ifc \r64,%rbp
+	\opd = 5
+	.endif
+	.ifc \r64,%rsi
+	\opd = 6
+	.endif
+	.ifc \r64,%rdi
+	\opd = 7
+	.endif
+	.ifc \r64,%r8
+	\opd = 8
+	.endif
+	.ifc \r64,%r9
+	\opd = 9
+	.endif
+	.ifc \r64,%r10
+	\opd = 10
+	.endif
+	.ifc \r64,%r11
+	\opd = 11
+	.endif
+	.ifc \r64,%r12
+	\opd = 12
+	.endif
+	.ifc \r64,%r13
+	\opd = 13
+	.endif
+	.ifc \r64,%r14
+	\opd = 14
+	.endif
+	.ifc \r64,%r15
+	\opd = 15
+	.endif
+	.endm
+
 	.macro XMM_NUM opd xmm
+	\opd = REG_NUM_INVALID
 	.ifc \xmm,%xmm0
 	\opd = 0
 	.endif
@@ -58,13 +117,25 @@
 	.endif
 	.endm
 
+	.macro REG_TYPE type reg
+	R64_NUM reg_type_r64 \reg
+	XMM_NUM reg_type_xmm \reg
+	.if reg_type_r64 <> REG_NUM_INVALID
+	\type = REG_TYPE_R64
+	.elseif reg_type_xmm <> REG_NUM_INVALID
+	\type = REG_TYPE_XMM
+	.else
+	\type = REG_TYPE_INVALID
+	.endif
+	.endm
+
 	.macro PFX_OPD_SIZE
 	.byte 0x66
 	.endm
 
-	.macro PFX_REX opd1 opd2
-	.if (\opd1 | \opd2) & 8
-	.byte 0x40 | ((\opd1 & 8) >> 3) | ((\opd2 & 8) >> 1)
+	.macro PFX_REX opd1 opd2 W=0
+	.if ((\opd1 | \opd2) & 8) || \W
+	.byte 0x40 | ((\opd1 & 8) >> 3) | ((\opd2 & 8) >> 1) | (\W << 3)
 	.endif
 	.endm
 
@@ -145,6 +216,25 @@
 	.byte 0x0f, 0x38, 0xdf
 	MODRM 0xc0 aesdeclast_opd1 aesdeclast_opd2
 	.endm
+
+	.macro MOVQ_R64_XMM opd1 opd2
+	REG_TYPE movq_r64_xmm_opd1_type \opd1
+	.if movq_r64_xmm_opd1_type == REG_TYPE_XMM
+	XMM_NUM movq_r64_xmm_opd1 \opd1
+	R64_NUM movq_r64_xmm_opd2 \opd2
+	.else
+	R64_NUM movq_r64_xmm_opd1 \opd1
+	XMM_NUM movq_r64_xmm_opd2 \opd2
+	.endif
+	PFX_OPD_SIZE
+	PFX_REX movq_r64_xmm_opd1 movq_r64_xmm_opd2 1
+	.if movq_r64_xmm_opd1_type == REG_TYPE_XMM
+	.byte 0x0f, 0x7e
+	.else
+	.byte 0x0f, 0x6e
+	.endif
+	MODRM 0xc0 movq_r64_xmm_opd1 movq_r64_xmm_opd2
+	.endm
 #endif
 
 #endif
diff --git a/crypto/algapi.c b/crypto/algapi.c
index 76fae27..c3cf1a6 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -544,7 +544,7 @@
 {
 	int err = -EINVAL;
 
-	if (frontend && (alg->cra_flags ^ frontend->type) & frontend->maskset)
+	if ((alg->cra_flags ^ frontend->type) & frontend->maskset)
 		goto out;
 
 	spawn->frontend = frontend;
diff --git a/crypto/internal.h b/crypto/internal.h
index 2d22636..d4384b0 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -6,7 +6,7 @@
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option) 
+ * Software Foundation; either version 2 of the License, or (at your option)
  * any later version.
  *
  */
diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c
index 8020124..247178c 100644
--- a/crypto/pcrypt.c
+++ b/crypto/pcrypt.c
@@ -315,16 +315,13 @@
 	goto out;
 }
 
-static struct crypto_instance *pcrypt_alloc_aead(struct rtattr **tb)
+static struct crypto_instance *pcrypt_alloc_aead(struct rtattr **tb,
+						 u32 type, u32 mask)
 {
 	struct crypto_instance *inst;
 	struct crypto_alg *alg;
-	struct crypto_attr_type *algt;
 
-	algt = crypto_get_attr_type(tb);
-
-	alg = crypto_get_attr_alg(tb, algt->type,
-				  (algt->mask & CRYPTO_ALG_TYPE_MASK));
+	alg = crypto_get_attr_alg(tb, type, (mask & CRYPTO_ALG_TYPE_MASK));
 	if (IS_ERR(alg))
 		return ERR_CAST(alg);
 
@@ -365,7 +362,7 @@
 
 	switch (algt->type & algt->mask & CRYPTO_ALG_TYPE_MASK) {
 	case CRYPTO_ALG_TYPE_AEAD:
-		return pcrypt_alloc_aead(tb);
+		return pcrypt_alloc_aead(tb, algt->type, algt->mask);
 	}
 
 	return ERR_PTR(-EINVAL);
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index a351599..ea610ad 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -437,6 +437,9 @@
 			goto out;
 		}
 
+		if (speed[i].klen)
+			crypto_hash_setkey(tfm, tvmem[0], speed[i].klen);
+
 		printk(KERN_INFO "test%3u "
 		       "(%5u byte blocks,%5u bytes per update,%4u updates): ",
 		       i, speed[i].blen, speed[i].plen, speed[i].blen / speed[i].plen);
@@ -881,6 +884,10 @@
 		test_hash_speed("rmd320", sec, generic_hash_speed_template);
 		if (mode > 300 && mode < 400) break;
 
+	case 318:
+		test_hash_speed("ghash-generic", sec, hash_speed_template_16);
+		if (mode > 300 && mode < 400) break;
+
 	case 399:
 		break;
 
diff --git a/crypto/tcrypt.h b/crypto/tcrypt.h
index 966bbfa..10cb925 100644
--- a/crypto/tcrypt.h
+++ b/crypto/tcrypt.h
@@ -25,6 +25,7 @@
 struct hash_speed {
 	unsigned int blen;	/* buffer length */
 	unsigned int plen;	/* per-update length */
+	unsigned int klen;	/* key length */
 };
 
 /*
@@ -83,4 +84,32 @@
 	{  .blen = 0,	.plen = 0, }
 };
 
+static struct hash_speed hash_speed_template_16[] = {
+	{ .blen = 16,	.plen = 16,	.klen = 16, },
+	{ .blen = 64,	.plen = 16,	.klen = 16, },
+	{ .blen = 64,	.plen = 64,	.klen = 16, },
+	{ .blen = 256,	.plen = 16,	.klen = 16, },
+	{ .blen = 256,	.plen = 64,	.klen = 16, },
+	{ .blen = 256,	.plen = 256,	.klen = 16, },
+	{ .blen = 1024,	.plen = 16,	.klen = 16, },
+	{ .blen = 1024,	.plen = 256,	.klen = 16, },
+	{ .blen = 1024,	.plen = 1024,	.klen = 16, },
+	{ .blen = 2048,	.plen = 16,	.klen = 16, },
+	{ .blen = 2048,	.plen = 256,	.klen = 16, },
+	{ .blen = 2048,	.plen = 1024,	.klen = 16, },
+	{ .blen = 2048,	.plen = 2048,	.klen = 16, },
+	{ .blen = 4096,	.plen = 16,	.klen = 16, },
+	{ .blen = 4096,	.plen = 256,	.klen = 16, },
+	{ .blen = 4096,	.plen = 1024,	.klen = 16, },
+	{ .blen = 4096,	.plen = 4096,	.klen = 16, },
+	{ .blen = 8192,	.plen = 16,	.klen = 16, },
+	{ .blen = 8192,	.plen = 256,	.klen = 16, },
+	{ .blen = 8192,	.plen = 1024,	.klen = 16, },
+	{ .blen = 8192,	.plen = 4096,	.klen = 16, },
+	{ .blen = 8192,	.plen = 8192,	.klen = 16, },
+
+	/* End marker */
+	{  .blen = 0,	.plen = 0,	.klen = 0, }
+};
+
 #endif	/* _CRYPTO_TCRYPT_H */
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index fb76517..74e3537 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -1669,17 +1669,73 @@
 	}
 };
 
-#define VMAC_AES_TEST_VECTORS	1
-static char vmac_string[128] = {'\x01', '\x01', '\x01', '\x01',
+#define VMAC_AES_TEST_VECTORS	8
+static char vmac_string1[128] = {'\x01', '\x01', '\x01', '\x01',
 				'\x02', '\x03', '\x02', '\x02',
 				'\x02', '\x04', '\x01', '\x07',
 				'\x04', '\x01', '\x04', '\x03',};
+static char vmac_string2[128] = {'a', 'b', 'c',};
+static char vmac_string3[128] = {'a', 'b', 'c', 'a', 'b', 'c',
+				'a', 'b', 'c', 'a', 'b', 'c',
+				'a', 'b', 'c', 'a', 'b', 'c',
+				'a', 'b', 'c', 'a', 'b', 'c',
+				'a', 'b', 'c', 'a', 'b', 'c',
+				'a', 'b', 'c', 'a', 'b', 'c',
+				'a', 'b', 'c', 'a', 'b', 'c',
+				'a', 'b', 'c', 'a', 'b', 'c',
+				};
+
 static struct hash_testvec aes_vmac128_tv_template[] = {
 	{
+		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+		.plaintext = NULL,
+		.digest	= "\x07\x58\x80\x35\x77\xa4\x7b\x54",
+		.psize	= 0,
+		.ksize	= 16,
+	}, {
 		.key    = "\x00\x01\x02\x03\x04\x05\x06\x07"
 			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
-		.plaintext = vmac_string,
-		.digest = "\xcb\xd7\x8a\xfd\xb7\x33\x79\xe7",
+		.plaintext = vmac_string1,
+		.digest = "\xce\xf5\x3c\xd3\xae\x68\x8c\xa1",
+		.psize  = 128,
+		.ksize  = 16,
+	}, {
+		.key    = "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+		.plaintext = vmac_string2,
+		.digest = "\xc9\x27\xb0\x73\x81\xbd\x14\x2d",
+		.psize  = 128,
+		.ksize  = 16,
+	}, {
+		.key    = "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+		.plaintext = vmac_string3,
+		.digest = "\x8d\x1a\x95\x8c\x98\x47\x0b\x19",
+		.psize  = 128,
+		.ksize  = 16,
+	}, {
+		.key	= "abcdefghijklmnop",
+		.plaintext = NULL,
+		.digest	= "\x3b\x89\xa1\x26\x9e\x55\x8f\x84",
+		.psize	= 0,
+		.ksize	= 16,
+	}, {
+		.key    = "abcdefghijklmnop",
+		.plaintext = vmac_string1,
+		.digest = "\xab\x5e\xab\xb0\xf6\x8d\x74\xc2",
+		.psize  = 128,
+		.ksize  = 16,
+	}, {
+		.key    = "abcdefghijklmnop",
+		.plaintext = vmac_string2,
+		.digest = "\x11\x15\x68\x42\x3d\x7b\x09\xdf",
+		.psize  = 128,
+		.ksize  = 16,
+	}, {
+		.key    = "abcdefghijklmnop",
+		.plaintext = vmac_string3,
+		.digest = "\x8b\x32\x8f\xe1\xed\x8f\xfa\xd4",
 		.psize  = 128,
 		.ksize  = 16,
 	},
diff --git a/crypto/vmac.c b/crypto/vmac.c
index 0a9468e..0999274 100644
--- a/crypto/vmac.c
+++ b/crypto/vmac.c
@@ -43,6 +43,8 @@
 const u64 m64   = UINT64_C(0xffffffffffffffff);  /* 64-bit mask       */
 const u64 mpoly = UINT64_C(0x1fffffff1fffffff);  /* Poly key mask     */
 
+#define pe64_to_cpup le64_to_cpup		/* Prefer little endian */
+
 #ifdef __LITTLE_ENDIAN
 #define INDEX_HIGH 1
 #define INDEX_LOW 0
@@ -110,8 +112,8 @@
 		int i; u64 th, tl;					\
 		rh = rl = 0;						\
 		for (i = 0; i < nw; i += 2) {				\
-			MUL64(th, tl, le64_to_cpup((mp)+i)+(kp)[i],	\
-				le64_to_cpup((mp)+i+1)+(kp)[i+1]);	\
+			MUL64(th, tl, pe64_to_cpup((mp)+i)+(kp)[i],	\
+				pe64_to_cpup((mp)+i+1)+(kp)[i+1]);	\
 			ADD128(rh, rl, th, tl);				\
 		}							\
 	} while (0)
@@ -121,11 +123,11 @@
 		int i; u64 th, tl;					\
 		rh1 = rl1 = rh = rl = 0;				\
 		for (i = 0; i < nw; i += 2) {				\
-			MUL64(th, tl, le64_to_cpup((mp)+i)+(kp)[i],	\
-				le64_to_cpup((mp)+i+1)+(kp)[i+1]);	\
+			MUL64(th, tl, pe64_to_cpup((mp)+i)+(kp)[i],	\
+				pe64_to_cpup((mp)+i+1)+(kp)[i+1]);	\
 			ADD128(rh, rl, th, tl);				\
-			MUL64(th, tl, le64_to_cpup((mp)+i)+(kp)[i+2],	\
-				le64_to_cpup((mp)+i+1)+(kp)[i+3]);	\
+			MUL64(th, tl, pe64_to_cpup((mp)+i)+(kp)[i+2],	\
+				pe64_to_cpup((mp)+i+1)+(kp)[i+3]);	\
 			ADD128(rh1, rl1, th, tl);			\
 		}							\
 	} while (0)
@@ -136,17 +138,17 @@
 		int i; u64 th, tl;					\
 		rh = rl = 0;						\
 		for (i = 0; i < nw; i += 8) {				\
-			MUL64(th, tl, le64_to_cpup((mp)+i)+(kp)[i],	\
-				le64_to_cpup((mp)+i+1)+(kp)[i+1]);	\
+			MUL64(th, tl, pe64_to_cpup((mp)+i)+(kp)[i],	\
+				pe64_to_cpup((mp)+i+1)+(kp)[i+1]);	\
 			ADD128(rh, rl, th, tl);				\
-			MUL64(th, tl, le64_to_cpup((mp)+i+2)+(kp)[i+2],	\
-				le64_to_cpup((mp)+i+3)+(kp)[i+3]);	\
+			MUL64(th, tl, pe64_to_cpup((mp)+i+2)+(kp)[i+2],	\
+				pe64_to_cpup((mp)+i+3)+(kp)[i+3]);	\
 			ADD128(rh, rl, th, tl);				\
-			MUL64(th, tl, le64_to_cpup((mp)+i+4)+(kp)[i+4],	\
-				le64_to_cpup((mp)+i+5)+(kp)[i+5]);	\
+			MUL64(th, tl, pe64_to_cpup((mp)+i+4)+(kp)[i+4],	\
+				pe64_to_cpup((mp)+i+5)+(kp)[i+5]);	\
 			ADD128(rh, rl, th, tl);				\
-			MUL64(th, tl, le64_to_cpup((mp)+i+6)+(kp)[i+6],	\
-				le64_to_cpup((mp)+i+7)+(kp)[i+7]);	\
+			MUL64(th, tl, pe64_to_cpup((mp)+i+6)+(kp)[i+6],	\
+				pe64_to_cpup((mp)+i+7)+(kp)[i+7]);	\
 			ADD128(rh, rl, th, tl);				\
 		}							\
 	} while (0)
@@ -156,29 +158,29 @@
 		int i; u64 th, tl;					\
 		rh1 = rl1 = rh = rl = 0;				\
 		for (i = 0; i < nw; i += 8) {				\
-			MUL64(th, tl, le64_to_cpup((mp)+i)+(kp)[i],	\
-				le64_to_cpup((mp)+i+1)+(kp)[i+1]);	\
+			MUL64(th, tl, pe64_to_cpup((mp)+i)+(kp)[i],	\
+				pe64_to_cpup((mp)+i+1)+(kp)[i+1]);	\
 			ADD128(rh, rl, th, tl);				\
-			MUL64(th, tl, le64_to_cpup((mp)+i)+(kp)[i+2],	\
-				le64_to_cpup((mp)+i+1)+(kp)[i+3]);	\
+			MUL64(th, tl, pe64_to_cpup((mp)+i)+(kp)[i+2],	\
+				pe64_to_cpup((mp)+i+1)+(kp)[i+3]);	\
 			ADD128(rh1, rl1, th, tl);			\
-			MUL64(th, tl, le64_to_cpup((mp)+i+2)+(kp)[i+2],	\
-				le64_to_cpup((mp)+i+3)+(kp)[i+3]);	\
+			MUL64(th, tl, pe64_to_cpup((mp)+i+2)+(kp)[i+2],	\
+				pe64_to_cpup((mp)+i+3)+(kp)[i+3]);	\
 			ADD128(rh, rl, th, tl);				\
-			MUL64(th, tl, le64_to_cpup((mp)+i+2)+(kp)[i+4],	\
-				le64_to_cpup((mp)+i+3)+(kp)[i+5]);	\
+			MUL64(th, tl, pe64_to_cpup((mp)+i+2)+(kp)[i+4],	\
+				pe64_to_cpup((mp)+i+3)+(kp)[i+5]);	\
 			ADD128(rh1, rl1, th, tl);			\
-			MUL64(th, tl, le64_to_cpup((mp)+i+4)+(kp)[i+4],	\
-				le64_to_cpup((mp)+i+5)+(kp)[i+5]);	\
+			MUL64(th, tl, pe64_to_cpup((mp)+i+4)+(kp)[i+4],	\
+				pe64_to_cpup((mp)+i+5)+(kp)[i+5]);	\
 			ADD128(rh, rl, th, tl);				\
-			MUL64(th, tl, le64_to_cpup((mp)+i+4)+(kp)[i+6],	\
-				le64_to_cpup((mp)+i+5)+(kp)[i+7]);	\
+			MUL64(th, tl, pe64_to_cpup((mp)+i+4)+(kp)[i+6],	\
+				pe64_to_cpup((mp)+i+5)+(kp)[i+7]);	\
 			ADD128(rh1, rl1, th, tl);			\
-			MUL64(th, tl, le64_to_cpup((mp)+i+6)+(kp)[i+6],	\
-				le64_to_cpup((mp)+i+7)+(kp)[i+7]);	\
+			MUL64(th, tl, pe64_to_cpup((mp)+i+6)+(kp)[i+6],	\
+				pe64_to_cpup((mp)+i+7)+(kp)[i+7]);	\
 			ADD128(rh, rl, th, tl);				\
-			MUL64(th, tl, le64_to_cpup((mp)+i+6)+(kp)[i+8],	\
-				le64_to_cpup((mp)+i+7)+(kp)[i+9]);	\
+			MUL64(th, tl, pe64_to_cpup((mp)+i+6)+(kp)[i+8],	\
+				pe64_to_cpup((mp)+i+7)+(kp)[i+9]);	\
 			ADD128(rh1, rl1, th, tl);			\
 		}							\
 	} while (0)
@@ -216,8 +218,8 @@
 		int i;							\
 		rh = rl = t = 0;					\
 		for (i = 0; i < nw; i += 2)  {				\
-			t1 = le64_to_cpup(mp+i) + kp[i];		\
-			t2 = le64_to_cpup(mp+i+1) + kp[i+1];		\
+			t1 = pe64_to_cpup(mp+i) + kp[i];		\
+			t2 = pe64_to_cpup(mp+i+1) + kp[i+1];		\
 			m2 = MUL32(t1 >> 32, t2);			\
 			m1 = MUL32(t1, t2 >> 32);			\
 			ADD128(rh, rl, MUL32(t1 >> 32, t2 >> 32),	\
@@ -322,8 +324,7 @@
 	ctx->first_block_processed = 0;
 }
 
-static u64 l3hash(u64 p1, u64 p2,
-			u64 k1, u64 k2, u64 len)
+static u64 l3hash(u64 p1, u64 p2, u64 k1, u64 k2, u64 len)
 {
 	u64 rh, rl, t, z = 0;
 
@@ -474,7 +475,7 @@
 	}
 	p = be64_to_cpup(out_p + i);
 	h = vhash(m, mbytes, (u64 *)0, &ctx->__vmac_ctx);
-	return p + h;
+	return le64_to_cpu(p + h);
 }
 
 static int vmac_set_key(unsigned char user_key[], struct vmac_ctx_t *ctx)
@@ -549,10 +550,6 @@
 
 static int vmac_init(struct shash_desc *pdesc)
 {
-	struct crypto_shash *parent = pdesc->tfm;
-	struct vmac_ctx_t *ctx = crypto_shash_ctx(parent);
-
-	memset(&ctx->__vmac_ctx, 0, sizeof(struct vmac_ctx));
 	return 0;
 }
 
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index b08403d..9073aa0 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -222,4 +222,13 @@
 	help
 	  This option allows you to have support for AMCC crypto acceleration.
 
+config CRYPTO_DEV_OMAP_SHAM
+	tristate "Support for OMAP SHA1/MD5 hw accelerator"
+	depends on ARCH_OMAP2 || ARCH_OMAP3
+	select CRYPTO_SHA1
+	select CRYPTO_MD5
+	help
+	  OMAP processors have SHA1/MD5 hw accelerator. Select this if you
+	  want to use the OMAP module for SHA1/MD5 algorithms.
+
 endif # CRYPTO_HW
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index 6ffcb3f..c9494e1 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -6,3 +6,5 @@
 obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o
 obj-$(CONFIG_CRYPTO_DEV_IXP4XX) += ixp4xx_crypto.o
 obj-$(CONFIG_CRYPTO_DEV_PPC4XX) += amcc/
+obj-$(CONFIG_CRYPTO_DEV_OMAP_SHAM) += omap-sham.o
+
diff --git a/drivers/crypto/geode-aes.c b/drivers/crypto/geode-aes.c
index c7a5a43..09389dd 100644
--- a/drivers/crypto/geode-aes.c
+++ b/drivers/crypto/geode-aes.c
@@ -15,14 +15,14 @@
 #include <crypto/algapi.h>
 #include <crypto/aes.h>
 
-#include <asm/io.h>
-#include <asm/delay.h>
+#include <linux/io.h>
+#include <linux/delay.h>
 
 #include "geode-aes.h"
 
 /* Static structures */
 
-static void __iomem * _iobase;
+static void __iomem *_iobase;
 static spinlock_t lock;
 
 /* Write a 128 bit field (either a writable key or IV) */
@@ -30,7 +30,7 @@
 _writefield(u32 offset, void *value)
 {
 	int i;
-	for(i = 0; i < 4; i++)
+	for (i = 0; i < 4; i++)
 		iowrite32(((u32 *) value)[i], _iobase + offset + (i * 4));
 }
 
@@ -39,7 +39,7 @@
 _readfield(u32 offset, void *value)
 {
 	int i;
-	for(i = 0; i < 4; i++)
+	for (i = 0; i < 4; i++)
 		((u32 *) value)[i] = ioread32(_iobase + offset + (i * 4));
 }
 
@@ -59,7 +59,7 @@
 	do {
 		status = ioread32(_iobase + AES_INTR_REG);
 		cpu_relax();
-	} while(!(status & AES_INTRA_PENDING) && --counter);
+	} while (!(status & AES_INTRA_PENDING) && --counter);
 
 	/* Clear the event */
 	iowrite32((status & 0xFF) | AES_INTRA_PENDING, _iobase + AES_INTR_REG);
@@ -317,7 +317,7 @@
 	err = blkcipher_walk_virt(desc, &walk);
 	op->iv = walk.iv;
 
-	while((nbytes = walk.nbytes)) {
+	while ((nbytes = walk.nbytes)) {
 		op->src = walk.src.virt.addr,
 		op->dst = walk.dst.virt.addr;
 		op->mode = AES_MODE_CBC;
@@ -349,7 +349,7 @@
 	err = blkcipher_walk_virt(desc, &walk);
 	op->iv = walk.iv;
 
-	while((nbytes = walk.nbytes)) {
+	while ((nbytes = walk.nbytes)) {
 		op->src = walk.src.virt.addr,
 		op->dst = walk.dst.virt.addr;
 		op->mode = AES_MODE_CBC;
@@ -429,7 +429,7 @@
 	blkcipher_walk_init(&walk, dst, src, nbytes);
 	err = blkcipher_walk_virt(desc, &walk);
 
-	while((nbytes = walk.nbytes)) {
+	while ((nbytes = walk.nbytes)) {
 		op->src = walk.src.virt.addr,
 		op->dst = walk.dst.virt.addr;
 		op->mode = AES_MODE_ECB;
@@ -459,7 +459,7 @@
 	blkcipher_walk_init(&walk, dst, src, nbytes);
 	err = blkcipher_walk_virt(desc, &walk);
 
-	while((nbytes = walk.nbytes)) {
+	while ((nbytes = walk.nbytes)) {
 		op->src = walk.src.virt.addr,
 		op->dst = walk.dst.virt.addr;
 		op->mode = AES_MODE_ECB;
@@ -518,11 +518,12 @@
 geode_aes_probe(struct pci_dev *dev, const struct pci_device_id *id)
 {
 	int ret;
-
-	if ((ret = pci_enable_device(dev)))
+	ret = pci_enable_device(dev);
+	if (ret)
 		return ret;
 
-	if ((ret = pci_request_regions(dev, "geode-aes")))
+	ret = pci_request_regions(dev, "geode-aes");
+	if (ret)
 		goto eenable;
 
 	_iobase = pci_iomap(dev, 0, 0);
@@ -537,13 +538,16 @@
 	/* Clear any pending activity */
 	iowrite32(AES_INTR_PENDING | AES_INTR_MASK, _iobase + AES_INTR_REG);
 
-	if ((ret = crypto_register_alg(&geode_alg)))
+	ret = crypto_register_alg(&geode_alg);
+	if (ret)
 		goto eiomap;
 
-	if ((ret = crypto_register_alg(&geode_ecb_alg)))
+	ret = crypto_register_alg(&geode_ecb_alg);
+	if (ret)
 		goto ealg;
 
-	if ((ret = crypto_register_alg(&geode_cbc_alg)))
+	ret = crypto_register_alg(&geode_cbc_alg);
+	if (ret)
 		goto eecb;
 
 	printk(KERN_NOTICE "geode-aes: GEODE AES engine enabled.\n");
diff --git a/drivers/crypto/mv_cesa.c b/drivers/crypto/mv_cesa.c
index 6f29012..18a436c 100644
--- a/drivers/crypto/mv_cesa.c
+++ b/drivers/crypto/mv_cesa.c
@@ -15,8 +15,14 @@
 #include <linux/platform_device.h>
 #include <linux/scatterlist.h>
 #include <linux/slab.h>
+#include <crypto/internal/hash.h>
+#include <crypto/sha.h>
 
 #include "mv_cesa.h"
+
+#define MV_CESA	"MV-CESA:"
+#define MAX_HW_HASH_SIZE	0xFFFF
+
 /*
  * STM:
  *   /---------------------------------------\
@@ -39,10 +45,12 @@
  * @dst_sg_it:		sg iterator for dst
  * @sg_src_left:	bytes left in src to process (scatter list)
  * @src_start:		offset to add to src start position (scatter list)
- * @crypt_len:		length of current crypt process
+ * @crypt_len:		length of current hw crypt/hash process
+ * @hw_nbytes:		total bytes to process in hw for this request
+ * @copy_back:		whether to copy data back (crypt) or not (hash)
  * @sg_dst_left:	bytes left dst to process in this scatter list
  * @dst_start:		offset to add to dst start position (scatter list)
- * @total_req_bytes:	total number of bytes processed (request).
+ * @hw_processed_bytes:	number of bytes processed by hw (request).
  *
  * sg helper are used to iterate over the scatterlist. Since the size of the
  * SRAM may be less than the scatter size, this struct struct is used to keep
@@ -51,15 +59,19 @@
 struct req_progress {
 	struct sg_mapping_iter src_sg_it;
 	struct sg_mapping_iter dst_sg_it;
+	void (*complete) (void);
+	void (*process) (int is_first);
 
 	/* src mostly */
 	int sg_src_left;
 	int src_start;
 	int crypt_len;
+	int hw_nbytes;
 	/* dst mostly */
+	int copy_back;
 	int sg_dst_left;
 	int dst_start;
-	int total_req_bytes;
+	int hw_processed_bytes;
 };
 
 struct crypto_priv {
@@ -72,10 +84,12 @@
 	spinlock_t lock;
 	struct crypto_queue queue;
 	enum engine_status eng_st;
-	struct ablkcipher_request *cur_req;
+	struct crypto_async_request *cur_req;
 	struct req_progress p;
 	int max_req_size;
 	int sram_size;
+	int has_sha1;
+	int has_hmac_sha1;
 };
 
 static struct crypto_priv *cpg;
@@ -97,6 +111,31 @@
 	int decrypt;
 };
 
+enum hash_op {
+	COP_SHA1,
+	COP_HMAC_SHA1
+};
+
+struct mv_tfm_hash_ctx {
+	struct crypto_shash *fallback;
+	struct crypto_shash *base_hash;
+	u32 ivs[2 * SHA1_DIGEST_SIZE / 4];
+	int count_add;
+	enum hash_op op;
+};
+
+struct mv_req_hash_ctx {
+	u64 count;
+	u32 state[SHA1_DIGEST_SIZE / 4];
+	u8 buffer[SHA1_BLOCK_SIZE];
+	int first_hash;		/* marks that we don't have previous state */
+	int last_chunk;		/* marks that this is the 'final' request */
+	int extra_bytes;	/* unprocessed bytes in buffer */
+	enum hash_op op;
+	int count_add;
+	struct scatterlist dummysg;
+};
+
 static void compute_aes_dec_key(struct mv_ctx *ctx)
 {
 	struct crypto_aes_ctx gen_aes_key;
@@ -144,32 +183,51 @@
 	return 0;
 }
 
-static void setup_data_in(struct ablkcipher_request *req)
+static void copy_src_to_buf(struct req_progress *p, char *dbuf, int len)
 {
 	int ret;
-	void *buf;
+	void *sbuf;
+	int copied = 0;
 
-	if (!cpg->p.sg_src_left) {
-		ret = sg_miter_next(&cpg->p.src_sg_it);
-		BUG_ON(!ret);
-		cpg->p.sg_src_left = cpg->p.src_sg_it.length;
-		cpg->p.src_start = 0;
+	while (1) {
+		if (!p->sg_src_left) {
+			ret = sg_miter_next(&p->src_sg_it);
+			BUG_ON(!ret);
+			p->sg_src_left = p->src_sg_it.length;
+			p->src_start = 0;
+		}
+
+		sbuf = p->src_sg_it.addr + p->src_start;
+
+		if (p->sg_src_left <= len - copied) {
+			memcpy(dbuf + copied, sbuf, p->sg_src_left);
+			copied += p->sg_src_left;
+			p->sg_src_left = 0;
+			if (copied >= len)
+				break;
+		} else {
+			int copy_len = len - copied;
+			memcpy(dbuf + copied, sbuf, copy_len);
+			p->src_start += copy_len;
+			p->sg_src_left -= copy_len;
+			break;
+		}
 	}
+}
 
-	cpg->p.crypt_len = min(cpg->p.sg_src_left, cpg->max_req_size);
-
-	buf = cpg->p.src_sg_it.addr;
-	buf += cpg->p.src_start;
-
-	memcpy(cpg->sram + SRAM_DATA_IN_START, buf, cpg->p.crypt_len);
-
-	cpg->p.sg_src_left -= cpg->p.crypt_len;
-	cpg->p.src_start += cpg->p.crypt_len;
+static void setup_data_in(void)
+{
+	struct req_progress *p = &cpg->p;
+	int data_in_sram =
+	    min(p->hw_nbytes - p->hw_processed_bytes, cpg->max_req_size);
+	copy_src_to_buf(p, cpg->sram + SRAM_DATA_IN_START + p->crypt_len,
+			data_in_sram - p->crypt_len);
+	p->crypt_len = data_in_sram;
 }
 
 static void mv_process_current_q(int first_block)
 {
-	struct ablkcipher_request *req = cpg->cur_req;
+	struct ablkcipher_request *req = ablkcipher_request_cast(cpg->cur_req);
 	struct mv_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
 	struct mv_req_ctx *req_ctx = ablkcipher_request_ctx(req);
 	struct sec_accel_config op;
@@ -179,6 +237,7 @@
 		op.config = CFG_OP_CRYPT_ONLY | CFG_ENCM_AES | CFG_ENC_MODE_ECB;
 		break;
 	case COP_AES_CBC:
+	default:
 		op.config = CFG_OP_CRYPT_ONLY | CFG_ENCM_AES | CFG_ENC_MODE_CBC;
 		op.enc_iv = ENC_IV_POINT(SRAM_DATA_IV) |
 			ENC_IV_BUF_POINT(SRAM_DATA_IV_BUF);
@@ -211,7 +270,7 @@
 		ENC_P_DST(SRAM_DATA_OUT_START);
 	op.enc_key_p = SRAM_DATA_KEY_P;
 
-	setup_data_in(req);
+	setup_data_in();
 	op.enc_len = cpg->p.crypt_len;
 	memcpy(cpg->sram + SRAM_CONFIG, &op,
 			sizeof(struct sec_accel_config));
@@ -228,91 +287,294 @@
 
 static void mv_crypto_algo_completion(void)
 {
-	struct ablkcipher_request *req = cpg->cur_req;
+	struct ablkcipher_request *req = ablkcipher_request_cast(cpg->cur_req);
 	struct mv_req_ctx *req_ctx = ablkcipher_request_ctx(req);
 
+	sg_miter_stop(&cpg->p.src_sg_it);
+	sg_miter_stop(&cpg->p.dst_sg_it);
+
 	if (req_ctx->op != COP_AES_CBC)
 		return ;
 
 	memcpy(req->info, cpg->sram + SRAM_DATA_IV_BUF, 16);
 }
 
+static void mv_process_hash_current(int first_block)
+{
+	struct ahash_request *req = ahash_request_cast(cpg->cur_req);
+	struct mv_req_hash_ctx *req_ctx = ahash_request_ctx(req);
+	struct req_progress *p = &cpg->p;
+	struct sec_accel_config op = { 0 };
+	int is_last;
+
+	switch (req_ctx->op) {
+	case COP_SHA1:
+	default:
+		op.config = CFG_OP_MAC_ONLY | CFG_MACM_SHA1;
+		break;
+	case COP_HMAC_SHA1:
+		op.config = CFG_OP_MAC_ONLY | CFG_MACM_HMAC_SHA1;
+		break;
+	}
+
+	op.mac_src_p =
+		MAC_SRC_DATA_P(SRAM_DATA_IN_START) | MAC_SRC_TOTAL_LEN((u32)
+		req_ctx->
+		count);
+
+	setup_data_in();
+
+	op.mac_digest =
+		MAC_DIGEST_P(SRAM_DIGEST_BUF) | MAC_FRAG_LEN(p->crypt_len);
+	op.mac_iv =
+		MAC_INNER_IV_P(SRAM_HMAC_IV_IN) |
+		MAC_OUTER_IV_P(SRAM_HMAC_IV_OUT);
+
+	is_last = req_ctx->last_chunk
+		&& (p->hw_processed_bytes + p->crypt_len >= p->hw_nbytes)
+		&& (req_ctx->count <= MAX_HW_HASH_SIZE);
+	if (req_ctx->first_hash) {
+		if (is_last)
+			op.config |= CFG_NOT_FRAG;
+		else
+			op.config |= CFG_FIRST_FRAG;
+
+		req_ctx->first_hash = 0;
+	} else {
+		if (is_last)
+			op.config |= CFG_LAST_FRAG;
+		else
+			op.config |= CFG_MID_FRAG;
+	}
+
+	memcpy(cpg->sram + SRAM_CONFIG, &op, sizeof(struct sec_accel_config));
+
+	writel(SRAM_CONFIG, cpg->reg + SEC_ACCEL_DESC_P0);
+	/* GO */
+	writel(SEC_CMD_EN_SEC_ACCL0, cpg->reg + SEC_ACCEL_CMD);
+
+	/*
+	* XXX: add timer if the interrupt does not occur for some mystery
+	* reason
+	*/
+}
+
+static inline int mv_hash_import_sha1_ctx(const struct mv_req_hash_ctx *ctx,
+					  struct shash_desc *desc)
+{
+	int i;
+	struct sha1_state shash_state;
+
+	shash_state.count = ctx->count + ctx->count_add;
+	for (i = 0; i < 5; i++)
+		shash_state.state[i] = ctx->state[i];
+	memcpy(shash_state.buffer, ctx->buffer, sizeof(shash_state.buffer));
+	return crypto_shash_import(desc, &shash_state);
+}
+
+static int mv_hash_final_fallback(struct ahash_request *req)
+{
+	const struct mv_tfm_hash_ctx *tfm_ctx = crypto_tfm_ctx(req->base.tfm);
+	struct mv_req_hash_ctx *req_ctx = ahash_request_ctx(req);
+	struct {
+		struct shash_desc shash;
+		char ctx[crypto_shash_descsize(tfm_ctx->fallback)];
+	} desc;
+	int rc;
+
+	desc.shash.tfm = tfm_ctx->fallback;
+	desc.shash.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+	if (unlikely(req_ctx->first_hash)) {
+		crypto_shash_init(&desc.shash);
+		crypto_shash_update(&desc.shash, req_ctx->buffer,
+				    req_ctx->extra_bytes);
+	} else {
+		/* only SHA1 for now....
+		 */
+		rc = mv_hash_import_sha1_ctx(req_ctx, &desc.shash);
+		if (rc)
+			goto out;
+	}
+	rc = crypto_shash_final(&desc.shash, req->result);
+out:
+	return rc;
+}
+
+static void mv_hash_algo_completion(void)
+{
+	struct ahash_request *req = ahash_request_cast(cpg->cur_req);
+	struct mv_req_hash_ctx *ctx = ahash_request_ctx(req);
+
+	if (ctx->extra_bytes)
+		copy_src_to_buf(&cpg->p, ctx->buffer, ctx->extra_bytes);
+	sg_miter_stop(&cpg->p.src_sg_it);
+
+	ctx->state[0] = readl(cpg->reg + DIGEST_INITIAL_VAL_A);
+	ctx->state[1] = readl(cpg->reg + DIGEST_INITIAL_VAL_B);
+	ctx->state[2] = readl(cpg->reg + DIGEST_INITIAL_VAL_C);
+	ctx->state[3] = readl(cpg->reg + DIGEST_INITIAL_VAL_D);
+	ctx->state[4] = readl(cpg->reg + DIGEST_INITIAL_VAL_E);
+
+	if (likely(ctx->last_chunk)) {
+		if (likely(ctx->count <= MAX_HW_HASH_SIZE)) {
+			memcpy(req->result, cpg->sram + SRAM_DIGEST_BUF,
+			       crypto_ahash_digestsize(crypto_ahash_reqtfm
+						       (req)));
+		} else
+			mv_hash_final_fallback(req);
+	}
+}
+
 static void dequeue_complete_req(void)
 {
-	struct ablkcipher_request *req = cpg->cur_req;
+	struct crypto_async_request *req = cpg->cur_req;
 	void *buf;
 	int ret;
+	cpg->p.hw_processed_bytes += cpg->p.crypt_len;
+	if (cpg->p.copy_back) {
+		int need_copy_len = cpg->p.crypt_len;
+		int sram_offset = 0;
+		do {
+			int dst_copy;
 
-	cpg->p.total_req_bytes += cpg->p.crypt_len;
-	do {
-		int dst_copy;
+			if (!cpg->p.sg_dst_left) {
+				ret = sg_miter_next(&cpg->p.dst_sg_it);
+				BUG_ON(!ret);
+				cpg->p.sg_dst_left = cpg->p.dst_sg_it.length;
+				cpg->p.dst_start = 0;
+			}
 
-		if (!cpg->p.sg_dst_left) {
-			ret = sg_miter_next(&cpg->p.dst_sg_it);
-			BUG_ON(!ret);
-			cpg->p.sg_dst_left = cpg->p.dst_sg_it.length;
-			cpg->p.dst_start = 0;
-		}
+			buf = cpg->p.dst_sg_it.addr;
+			buf += cpg->p.dst_start;
 
-		buf = cpg->p.dst_sg_it.addr;
-		buf += cpg->p.dst_start;
+			dst_copy = min(need_copy_len, cpg->p.sg_dst_left);
 
-		dst_copy = min(cpg->p.crypt_len, cpg->p.sg_dst_left);
+			memcpy(buf,
+			       cpg->sram + SRAM_DATA_OUT_START + sram_offset,
+			       dst_copy);
+			sram_offset += dst_copy;
+			cpg->p.sg_dst_left -= dst_copy;
+			need_copy_len -= dst_copy;
+			cpg->p.dst_start += dst_copy;
+		} while (need_copy_len > 0);
+	}
 
-		memcpy(buf, cpg->sram + SRAM_DATA_OUT_START, dst_copy);
-
-		cpg->p.sg_dst_left -= dst_copy;
-		cpg->p.crypt_len -= dst_copy;
-		cpg->p.dst_start += dst_copy;
-	} while (cpg->p.crypt_len > 0);
+	cpg->p.crypt_len = 0;
 
 	BUG_ON(cpg->eng_st != ENGINE_W_DEQUEUE);
-	if (cpg->p.total_req_bytes < req->nbytes) {
+	if (cpg->p.hw_processed_bytes < cpg->p.hw_nbytes) {
 		/* process next scatter list entry */
 		cpg->eng_st = ENGINE_BUSY;
-		mv_process_current_q(0);
+		cpg->p.process(0);
 	} else {
-		sg_miter_stop(&cpg->p.src_sg_it);
-		sg_miter_stop(&cpg->p.dst_sg_it);
-		mv_crypto_algo_completion();
+		cpg->p.complete();
 		cpg->eng_st = ENGINE_IDLE;
-		req->base.complete(&req->base, 0);
+		local_bh_disable();
+		req->complete(req, 0);
+		local_bh_enable();
 	}
 }
 
 static int count_sgs(struct scatterlist *sl, unsigned int total_bytes)
 {
 	int i = 0;
+	size_t cur_len;
 
-	do {
-		total_bytes -= sl[i].length;
-		i++;
-
-	} while (total_bytes > 0);
+	while (1) {
+		cur_len = sl[i].length;
+		++i;
+		if (total_bytes > cur_len)
+			total_bytes -= cur_len;
+		else
+			break;
+	}
 
 	return i;
 }
 
-static void mv_enqueue_new_req(struct ablkcipher_request *req)
+static void mv_start_new_crypt_req(struct ablkcipher_request *req)
 {
+	struct req_progress *p = &cpg->p;
 	int num_sgs;
 
-	cpg->cur_req = req;
-	memset(&cpg->p, 0, sizeof(struct req_progress));
+	cpg->cur_req = &req->base;
+	memset(p, 0, sizeof(struct req_progress));
+	p->hw_nbytes = req->nbytes;
+	p->complete = mv_crypto_algo_completion;
+	p->process = mv_process_current_q;
+	p->copy_back = 1;
 
 	num_sgs = count_sgs(req->src, req->nbytes);
-	sg_miter_start(&cpg->p.src_sg_it, req->src, num_sgs, SG_MITER_FROM_SG);
+	sg_miter_start(&p->src_sg_it, req->src, num_sgs, SG_MITER_FROM_SG);
 
 	num_sgs = count_sgs(req->dst, req->nbytes);
-	sg_miter_start(&cpg->p.dst_sg_it, req->dst, num_sgs, SG_MITER_TO_SG);
+	sg_miter_start(&p->dst_sg_it, req->dst, num_sgs, SG_MITER_TO_SG);
+
 	mv_process_current_q(1);
 }
 
+static void mv_start_new_hash_req(struct ahash_request *req)
+{
+	struct req_progress *p = &cpg->p;
+	struct mv_req_hash_ctx *ctx = ahash_request_ctx(req);
+	const struct mv_tfm_hash_ctx *tfm_ctx = crypto_tfm_ctx(req->base.tfm);
+	int num_sgs, hw_bytes, old_extra_bytes, rc;
+	cpg->cur_req = &req->base;
+	memset(p, 0, sizeof(struct req_progress));
+	hw_bytes = req->nbytes + ctx->extra_bytes;
+	old_extra_bytes = ctx->extra_bytes;
+
+	if (unlikely(ctx->extra_bytes)) {
+		memcpy(cpg->sram + SRAM_DATA_IN_START, ctx->buffer,
+		       ctx->extra_bytes);
+		p->crypt_len = ctx->extra_bytes;
+	}
+
+	memcpy(cpg->sram + SRAM_HMAC_IV_IN, tfm_ctx->ivs, sizeof(tfm_ctx->ivs));
+
+	if (unlikely(!ctx->first_hash)) {
+		writel(ctx->state[0], cpg->reg + DIGEST_INITIAL_VAL_A);
+		writel(ctx->state[1], cpg->reg + DIGEST_INITIAL_VAL_B);
+		writel(ctx->state[2], cpg->reg + DIGEST_INITIAL_VAL_C);
+		writel(ctx->state[3], cpg->reg + DIGEST_INITIAL_VAL_D);
+		writel(ctx->state[4], cpg->reg + DIGEST_INITIAL_VAL_E);
+	}
+
+	ctx->extra_bytes = hw_bytes % SHA1_BLOCK_SIZE;
+	if (ctx->extra_bytes != 0
+	    && (!ctx->last_chunk || ctx->count > MAX_HW_HASH_SIZE))
+		hw_bytes -= ctx->extra_bytes;
+	else
+		ctx->extra_bytes = 0;
+
+	num_sgs = count_sgs(req->src, req->nbytes);
+	sg_miter_start(&p->src_sg_it, req->src, num_sgs, SG_MITER_FROM_SG);
+
+	if (hw_bytes) {
+		p->hw_nbytes = hw_bytes;
+		p->complete = mv_hash_algo_completion;
+		p->process = mv_process_hash_current;
+
+		mv_process_hash_current(1);
+	} else {
+		copy_src_to_buf(p, ctx->buffer + old_extra_bytes,
+				ctx->extra_bytes - old_extra_bytes);
+		sg_miter_stop(&p->src_sg_it);
+		if (ctx->last_chunk)
+			rc = mv_hash_final_fallback(req);
+		else
+			rc = 0;
+		cpg->eng_st = ENGINE_IDLE;
+		local_bh_disable();
+		req->base.complete(&req->base, rc);
+		local_bh_enable();
+	}
+}
+
 static int queue_manag(void *data)
 {
 	cpg->eng_st = ENGINE_IDLE;
 	do {
-		struct ablkcipher_request *req;
 		struct crypto_async_request *async_req = NULL;
 		struct crypto_async_request *backlog;
 
@@ -338,9 +600,18 @@
 		}
 
 		if (async_req) {
-			req = container_of(async_req,
-					struct ablkcipher_request, base);
-			mv_enqueue_new_req(req);
+			if (async_req->tfm->__crt_alg->cra_type !=
+			    &crypto_ahash_type) {
+				struct ablkcipher_request *req =
+				    container_of(async_req,
+						 struct ablkcipher_request,
+						 base);
+				mv_start_new_crypt_req(req);
+			} else {
+				struct ahash_request *req =
+				    ahash_request_cast(async_req);
+				mv_start_new_hash_req(req);
+			}
 			async_req = NULL;
 		}
 
@@ -350,13 +621,13 @@
 	return 0;
 }
 
-static int mv_handle_req(struct ablkcipher_request *req)
+static int mv_handle_req(struct crypto_async_request *req)
 {
 	unsigned long flags;
 	int ret;
 
 	spin_lock_irqsave(&cpg->lock, flags);
-	ret = ablkcipher_enqueue_request(&cpg->queue, req);
+	ret = crypto_enqueue_request(&cpg->queue, req);
 	spin_unlock_irqrestore(&cpg->lock, flags);
 	wake_up_process(cpg->queue_th);
 	return ret;
@@ -369,7 +640,7 @@
 	req_ctx->op = COP_AES_ECB;
 	req_ctx->decrypt = 0;
 
-	return mv_handle_req(req);
+	return mv_handle_req(&req->base);
 }
 
 static int mv_dec_aes_ecb(struct ablkcipher_request *req)
@@ -381,7 +652,7 @@
 	req_ctx->decrypt = 1;
 
 	compute_aes_dec_key(ctx);
-	return mv_handle_req(req);
+	return mv_handle_req(&req->base);
 }
 
 static int mv_enc_aes_cbc(struct ablkcipher_request *req)
@@ -391,7 +662,7 @@
 	req_ctx->op = COP_AES_CBC;
 	req_ctx->decrypt = 0;
 
-	return mv_handle_req(req);
+	return mv_handle_req(&req->base);
 }
 
 static int mv_dec_aes_cbc(struct ablkcipher_request *req)
@@ -403,7 +674,7 @@
 	req_ctx->decrypt = 1;
 
 	compute_aes_dec_key(ctx);
-	return mv_handle_req(req);
+	return mv_handle_req(&req->base);
 }
 
 static int mv_cra_init(struct crypto_tfm *tfm)
@@ -412,6 +683,215 @@
 	return 0;
 }
 
+static void mv_init_hash_req_ctx(struct mv_req_hash_ctx *ctx, int op,
+				 int is_last, unsigned int req_len,
+				 int count_add)
+{
+	memset(ctx, 0, sizeof(*ctx));
+	ctx->op = op;
+	ctx->count = req_len;
+	ctx->first_hash = 1;
+	ctx->last_chunk = is_last;
+	ctx->count_add = count_add;
+}
+
+static void mv_update_hash_req_ctx(struct mv_req_hash_ctx *ctx, int is_last,
+				   unsigned req_len)
+{
+	ctx->last_chunk = is_last;
+	ctx->count += req_len;
+}
+
+static int mv_hash_init(struct ahash_request *req)
+{
+	const struct mv_tfm_hash_ctx *tfm_ctx = crypto_tfm_ctx(req->base.tfm);
+	mv_init_hash_req_ctx(ahash_request_ctx(req), tfm_ctx->op, 0, 0,
+			     tfm_ctx->count_add);
+	return 0;
+}
+
+static int mv_hash_update(struct ahash_request *req)
+{
+	if (!req->nbytes)
+		return 0;
+
+	mv_update_hash_req_ctx(ahash_request_ctx(req), 0, req->nbytes);
+	return mv_handle_req(&req->base);
+}
+
+static int mv_hash_final(struct ahash_request *req)
+{
+	struct mv_req_hash_ctx *ctx = ahash_request_ctx(req);
+	/* dummy buffer of 4 bytes */
+	sg_init_one(&ctx->dummysg, ctx->buffer, 4);
+	/* I think I'm allowed to do that... */
+	ahash_request_set_crypt(req, &ctx->dummysg, req->result, 0);
+	mv_update_hash_req_ctx(ctx, 1, 0);
+	return mv_handle_req(&req->base);
+}
+
+static int mv_hash_finup(struct ahash_request *req)
+{
+	if (!req->nbytes)
+		return mv_hash_final(req);
+
+	mv_update_hash_req_ctx(ahash_request_ctx(req), 1, req->nbytes);
+	return mv_handle_req(&req->base);
+}
+
+static int mv_hash_digest(struct ahash_request *req)
+{
+	const struct mv_tfm_hash_ctx *tfm_ctx = crypto_tfm_ctx(req->base.tfm);
+	mv_init_hash_req_ctx(ahash_request_ctx(req), tfm_ctx->op, 1,
+			     req->nbytes, tfm_ctx->count_add);
+	return mv_handle_req(&req->base);
+}
+
+static void mv_hash_init_ivs(struct mv_tfm_hash_ctx *ctx, const void *istate,
+			     const void *ostate)
+{
+	const struct sha1_state *isha1_state = istate, *osha1_state = ostate;
+	int i;
+	for (i = 0; i < 5; i++) {
+		ctx->ivs[i] = cpu_to_be32(isha1_state->state[i]);
+		ctx->ivs[i + 5] = cpu_to_be32(osha1_state->state[i]);
+	}
+}
+
+static int mv_hash_setkey(struct crypto_ahash *tfm, const u8 * key,
+			  unsigned int keylen)
+{
+	int rc;
+	struct mv_tfm_hash_ctx *ctx = crypto_tfm_ctx(&tfm->base);
+	int bs, ds, ss;
+
+	if (!ctx->base_hash)
+		return 0;
+
+	rc = crypto_shash_setkey(ctx->fallback, key, keylen);
+	if (rc)
+		return rc;
+
+	/* Can't see a way to extract the ipad/opad from the fallback tfm
+	   so I'm basically copying code from the hmac module */
+	bs = crypto_shash_blocksize(ctx->base_hash);
+	ds = crypto_shash_digestsize(ctx->base_hash);
+	ss = crypto_shash_statesize(ctx->base_hash);
+
+	{
+		struct {
+			struct shash_desc shash;
+			char ctx[crypto_shash_descsize(ctx->base_hash)];
+		} desc;
+		unsigned int i;
+		char ipad[ss];
+		char opad[ss];
+
+		desc.shash.tfm = ctx->base_hash;
+		desc.shash.flags = crypto_shash_get_flags(ctx->base_hash) &
+		    CRYPTO_TFM_REQ_MAY_SLEEP;
+
+		if (keylen > bs) {
+			int err;
+
+			err =
+			    crypto_shash_digest(&desc.shash, key, keylen, ipad);
+			if (err)
+				return err;
+
+			keylen = ds;
+		} else
+			memcpy(ipad, key, keylen);
+
+		memset(ipad + keylen, 0, bs - keylen);
+		memcpy(opad, ipad, bs);
+
+		for (i = 0; i < bs; i++) {
+			ipad[i] ^= 0x36;
+			opad[i] ^= 0x5c;
+		}
+
+		rc = crypto_shash_init(&desc.shash) ? :
+		    crypto_shash_update(&desc.shash, ipad, bs) ? :
+		    crypto_shash_export(&desc.shash, ipad) ? :
+		    crypto_shash_init(&desc.shash) ? :
+		    crypto_shash_update(&desc.shash, opad, bs) ? :
+		    crypto_shash_export(&desc.shash, opad);
+
+		if (rc == 0)
+			mv_hash_init_ivs(ctx, ipad, opad);
+
+		return rc;
+	}
+}
+
+static int mv_cra_hash_init(struct crypto_tfm *tfm, const char *base_hash_name,
+			    enum hash_op op, int count_add)
+{
+	const char *fallback_driver_name = tfm->__crt_alg->cra_name;
+	struct mv_tfm_hash_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct crypto_shash *fallback_tfm = NULL;
+	struct crypto_shash *base_hash = NULL;
+	int err = -ENOMEM;
+
+	ctx->op = op;
+	ctx->count_add = count_add;
+
+	/* Allocate a fallback and abort if it failed. */
+	fallback_tfm = crypto_alloc_shash(fallback_driver_name, 0,
+					  CRYPTO_ALG_NEED_FALLBACK);
+	if (IS_ERR(fallback_tfm)) {
+		printk(KERN_WARNING MV_CESA
+		       "Fallback driver '%s' could not be loaded!\n",
+		       fallback_driver_name);
+		err = PTR_ERR(fallback_tfm);
+		goto out;
+	}
+	ctx->fallback = fallback_tfm;
+
+	if (base_hash_name) {
+		/* Allocate a hash to compute the ipad/opad of hmac. */
+		base_hash = crypto_alloc_shash(base_hash_name, 0,
+					       CRYPTO_ALG_NEED_FALLBACK);
+		if (IS_ERR(base_hash)) {
+			printk(KERN_WARNING MV_CESA
+			       "Base driver '%s' could not be loaded!\n",
+			       base_hash_name);
+			err = PTR_ERR(fallback_tfm);
+			goto err_bad_base;
+		}
+	}
+	ctx->base_hash = base_hash;
+
+	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+				 sizeof(struct mv_req_hash_ctx) +
+				 crypto_shash_descsize(ctx->fallback));
+	return 0;
+err_bad_base:
+	crypto_free_shash(fallback_tfm);
+out:
+	return err;
+}
+
+static void mv_cra_hash_exit(struct crypto_tfm *tfm)
+{
+	struct mv_tfm_hash_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	crypto_free_shash(ctx->fallback);
+	if (ctx->base_hash)
+		crypto_free_shash(ctx->base_hash);
+}
+
+static int mv_cra_hash_sha1_init(struct crypto_tfm *tfm)
+{
+	return mv_cra_hash_init(tfm, NULL, COP_SHA1, 0);
+}
+
+static int mv_cra_hash_hmac_sha1_init(struct crypto_tfm *tfm)
+{
+	return mv_cra_hash_init(tfm, "sha1", COP_HMAC_SHA1, SHA1_BLOCK_SIZE);
+}
+
 irqreturn_t crypto_int(int irq, void *priv)
 {
 	u32 val;
@@ -474,6 +954,53 @@
 	},
 };
 
+struct ahash_alg mv_sha1_alg = {
+	.init = mv_hash_init,
+	.update = mv_hash_update,
+	.final = mv_hash_final,
+	.finup = mv_hash_finup,
+	.digest = mv_hash_digest,
+	.halg = {
+		 .digestsize = SHA1_DIGEST_SIZE,
+		 .base = {
+			  .cra_name = "sha1",
+			  .cra_driver_name = "mv-sha1",
+			  .cra_priority = 300,
+			  .cra_flags =
+			  CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
+			  .cra_blocksize = SHA1_BLOCK_SIZE,
+			  .cra_ctxsize = sizeof(struct mv_tfm_hash_ctx),
+			  .cra_init = mv_cra_hash_sha1_init,
+			  .cra_exit = mv_cra_hash_exit,
+			  .cra_module = THIS_MODULE,
+			  }
+		 }
+};
+
+struct ahash_alg mv_hmac_sha1_alg = {
+	.init = mv_hash_init,
+	.update = mv_hash_update,
+	.final = mv_hash_final,
+	.finup = mv_hash_finup,
+	.digest = mv_hash_digest,
+	.setkey = mv_hash_setkey,
+	.halg = {
+		 .digestsize = SHA1_DIGEST_SIZE,
+		 .base = {
+			  .cra_name = "hmac(sha1)",
+			  .cra_driver_name = "mv-hmac-sha1",
+			  .cra_priority = 300,
+			  .cra_flags =
+			  CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
+			  .cra_blocksize = SHA1_BLOCK_SIZE,
+			  .cra_ctxsize = sizeof(struct mv_tfm_hash_ctx),
+			  .cra_init = mv_cra_hash_hmac_sha1_init,
+			  .cra_exit = mv_cra_hash_exit,
+			  .cra_module = THIS_MODULE,
+			  }
+		 }
+};
+
 static int mv_probe(struct platform_device *pdev)
 {
 	struct crypto_priv *cp;
@@ -482,7 +1009,7 @@
 	int ret;
 
 	if (cpg) {
-		printk(KERN_ERR "Second crypto dev?\n");
+		printk(KERN_ERR MV_CESA "Second crypto dev?\n");
 		return -EEXIST;
 	}
 
@@ -546,6 +1073,21 @@
 	ret = crypto_register_alg(&mv_aes_alg_cbc);
 	if (ret)
 		goto err_unreg_ecb;
+
+	ret = crypto_register_ahash(&mv_sha1_alg);
+	if (ret == 0)
+		cpg->has_sha1 = 1;
+	else
+		printk(KERN_WARNING MV_CESA "Could not register sha1 driver\n");
+
+	ret = crypto_register_ahash(&mv_hmac_sha1_alg);
+	if (ret == 0) {
+		cpg->has_hmac_sha1 = 1;
+	} else {
+		printk(KERN_WARNING MV_CESA
+		       "Could not register hmac-sha1 driver\n");
+	}
+
 	return 0;
 err_unreg_ecb:
 	crypto_unregister_alg(&mv_aes_alg_ecb);
@@ -570,6 +1112,10 @@
 
 	crypto_unregister_alg(&mv_aes_alg_ecb);
 	crypto_unregister_alg(&mv_aes_alg_cbc);
+	if (cp->has_sha1)
+		crypto_unregister_ahash(&mv_sha1_alg);
+	if (cp->has_hmac_sha1)
+		crypto_unregister_ahash(&mv_hmac_sha1_alg);
 	kthread_stop(cp->queue_th);
 	free_irq(cp->irq, cp);
 	memset(cp->sram, 0, cp->sram_size);
diff --git a/drivers/crypto/mv_cesa.h b/drivers/crypto/mv_cesa.h
index c3e25d3..08fcb11 100644
--- a/drivers/crypto/mv_cesa.h
+++ b/drivers/crypto/mv_cesa.h
@@ -1,6 +1,10 @@
 #ifndef __MV_CRYPTO_H__
 
 #define DIGEST_INITIAL_VAL_A	0xdd00
+#define DIGEST_INITIAL_VAL_B	0xdd04
+#define DIGEST_INITIAL_VAL_C	0xdd08
+#define DIGEST_INITIAL_VAL_D	0xdd0c
+#define DIGEST_INITIAL_VAL_E	0xdd10
 #define DES_CMD_REG		0xdd58
 
 #define SEC_ACCEL_CMD		0xde00
@@ -70,6 +74,10 @@
 #define CFG_AES_LEN_128		(0 << 24)
 #define CFG_AES_LEN_192		(1 << 24)
 #define CFG_AES_LEN_256		(2 << 24)
+#define CFG_NOT_FRAG		(0 << 30)
+#define CFG_FIRST_FRAG		(1 << 30)
+#define CFG_LAST_FRAG		(2 << 30)
+#define CFG_MID_FRAG		(3 << 30)
 
 	u32 enc_p;
 #define ENC_P_SRC(x)		(x)
@@ -90,7 +98,11 @@
 #define MAC_SRC_TOTAL_LEN(x)	((x) << 16)
 
 	u32 mac_digest;
+#define MAC_DIGEST_P(x)	(x)
+#define MAC_FRAG_LEN(x)	((x) << 16)
 	u32 mac_iv;
+#define MAC_INNER_IV_P(x)	(x)
+#define MAC_OUTER_IV_P(x)	((x) << 16)
 }__attribute__ ((packed));
 	/*
 	 * /-----------\ 0
@@ -101,19 +113,37 @@
 	 * |  IV   IN  |	4 * 4
 	 * |-----------| 0x40 (inplace)
 	 * |  IV BUF   |	4 * 4
-	 * |-----------| 0x50
+	 * |-----------| 0x80
 	 * |  DATA IN  |	16 * x (max ->max_req_size)
-	 * |-----------| 0x50 (inplace operation)
+	 * |-----------| 0x80 (inplace operation)
 	 * |  DATA OUT |	16 * x (max ->max_req_size)
 	 * \-----------/ SRAM size
 	 */
+
+	/* Hashing memory map:
+	 * /-----------\ 0
+	 * | ACCEL CFG |        4 * 8
+	 * |-----------| 0x20
+	 * | Inner IV  |        5 * 4
+	 * |-----------| 0x34
+	 * | Outer IV  |        5 * 4
+	 * |-----------| 0x48
+	 * | Output BUF|        5 * 4
+	 * |-----------| 0x80
+	 * |  DATA IN  |        64 * x (max ->max_req_size)
+	 * \-----------/ SRAM size
+	 */
 #define SRAM_CONFIG		0x00
 #define SRAM_DATA_KEY_P		0x20
 #define SRAM_DATA_IV		0x40
 #define SRAM_DATA_IV_BUF	0x40
-#define SRAM_DATA_IN_START	0x50
-#define SRAM_DATA_OUT_START	0x50
+#define SRAM_DATA_IN_START	0x80
+#define SRAM_DATA_OUT_START	0x80
 
-#define SRAM_CFG_SPACE		0x50
+#define SRAM_HMAC_IV_IN		0x20
+#define SRAM_HMAC_IV_OUT	0x34
+#define SRAM_DIGEST_BUF		0x48
+
+#define SRAM_CFG_SPACE		0x80
 
 #endif
diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c
new file mode 100644
index 0000000..8b03433
--- /dev/null
+++ b/drivers/crypto/omap-sham.c
@@ -0,0 +1,1259 @@
+/*
+ * Cryptographic API.
+ *
+ * Support for OMAP SHA1/MD5 HW acceleration.
+ *
+ * Copyright (c) 2010 Nokia Corporation
+ * Author: Dmitry Kasatkin <dmitry.kasatkin@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * Some ideas are from old omap-sha1-md5.c driver.
+ */
+
+#define pr_fmt(fmt) "%s: " fmt, __func__
+
+#include <linux/version.h>
+#include <linux/err.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/clk.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+#include <linux/scatterlist.h>
+#include <linux/dma-mapping.h>
+#include <linux/delay.h>
+#include <linux/crypto.h>
+#include <linux/cryptohash.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/algapi.h>
+#include <crypto/sha.h>
+#include <crypto/hash.h>
+#include <crypto/internal/hash.h>
+
+#include <plat/cpu.h>
+#include <plat/dma.h>
+#include <mach/irqs.h>
+
+#define SHA_REG_DIGEST(x)		(0x00 + ((x) * 0x04))
+#define SHA_REG_DIN(x)			(0x1C + ((x) * 0x04))
+
+#define SHA1_MD5_BLOCK_SIZE		SHA1_BLOCK_SIZE
+#define MD5_DIGEST_SIZE			16
+
+#define SHA_REG_DIGCNT			0x14
+
+#define SHA_REG_CTRL			0x18
+#define SHA_REG_CTRL_LENGTH		(0xFFFFFFFF << 5)
+#define SHA_REG_CTRL_CLOSE_HASH		(1 << 4)
+#define SHA_REG_CTRL_ALGO_CONST		(1 << 3)
+#define SHA_REG_CTRL_ALGO		(1 << 2)
+#define SHA_REG_CTRL_INPUT_READY	(1 << 1)
+#define SHA_REG_CTRL_OUTPUT_READY	(1 << 0)
+
+#define SHA_REG_REV			0x5C
+#define SHA_REG_REV_MAJOR		0xF0
+#define SHA_REG_REV_MINOR		0x0F
+
+#define SHA_REG_MASK			0x60
+#define SHA_REG_MASK_DMA_EN		(1 << 3)
+#define SHA_REG_MASK_IT_EN		(1 << 2)
+#define SHA_REG_MASK_SOFTRESET		(1 << 1)
+#define SHA_REG_AUTOIDLE		(1 << 0)
+
+#define SHA_REG_SYSSTATUS		0x64
+#define SHA_REG_SYSSTATUS_RESETDONE	(1 << 0)
+
+#define DEFAULT_TIMEOUT_INTERVAL	HZ
+
+#define FLAGS_FIRST		0x0001
+#define FLAGS_FINUP		0x0002
+#define FLAGS_FINAL		0x0004
+#define FLAGS_FAST		0x0008
+#define FLAGS_SHA1		0x0010
+#define FLAGS_DMA_ACTIVE	0x0020
+#define FLAGS_OUTPUT_READY	0x0040
+#define FLAGS_CLEAN		0x0080
+#define FLAGS_INIT		0x0100
+#define FLAGS_CPU		0x0200
+#define FLAGS_HMAC		0x0400
+
+/* 3rd byte */
+#define FLAGS_BUSY		16
+
+#define OP_UPDATE	1
+#define OP_FINAL	2
+
+struct omap_sham_dev;
+
+struct omap_sham_reqctx {
+	struct omap_sham_dev	*dd;
+	unsigned long		flags;
+	unsigned long		op;
+
+	size_t			digcnt;
+	u8			*buffer;
+	size_t			bufcnt;
+	size_t			buflen;
+	dma_addr_t		dma_addr;
+
+	/* walk state */
+	struct scatterlist	*sg;
+	unsigned int		offset;	/* offset in current sg */
+	unsigned int		total;	/* total request */
+};
+
+struct omap_sham_hmac_ctx {
+	struct crypto_shash	*shash;
+	u8			ipad[SHA1_MD5_BLOCK_SIZE];
+	u8			opad[SHA1_MD5_BLOCK_SIZE];
+};
+
+struct omap_sham_ctx {
+	struct omap_sham_dev	*dd;
+
+	unsigned long		flags;
+
+	/* fallback stuff */
+	struct crypto_shash	*fallback;
+
+	struct omap_sham_hmac_ctx base[0];
+};
+
+#define OMAP_SHAM_QUEUE_LENGTH	1
+
+struct omap_sham_dev {
+	struct list_head	list;
+	unsigned long		phys_base;
+	struct device		*dev;
+	void __iomem		*io_base;
+	int			irq;
+	struct clk		*iclk;
+	spinlock_t		lock;
+	int			dma;
+	int			dma_lch;
+	struct tasklet_struct	done_task;
+	struct tasklet_struct	queue_task;
+
+	unsigned long		flags;
+	struct crypto_queue	queue;
+	struct ahash_request	*req;
+};
+
+struct omap_sham_drv {
+	struct list_head	dev_list;
+	spinlock_t		lock;
+	unsigned long		flags;
+};
+
+static struct omap_sham_drv sham = {
+	.dev_list = LIST_HEAD_INIT(sham.dev_list),
+	.lock = __SPIN_LOCK_UNLOCKED(sham.lock),
+};
+
+static inline u32 omap_sham_read(struct omap_sham_dev *dd, u32 offset)
+{
+	return __raw_readl(dd->io_base + offset);
+}
+
+static inline void omap_sham_write(struct omap_sham_dev *dd,
+					u32 offset, u32 value)
+{
+	__raw_writel(value, dd->io_base + offset);
+}
+
+static inline void omap_sham_write_mask(struct omap_sham_dev *dd, u32 address,
+					u32 value, u32 mask)
+{
+	u32 val;
+
+	val = omap_sham_read(dd, address);
+	val &= ~mask;
+	val |= value;
+	omap_sham_write(dd, address, val);
+}
+
+static inline int omap_sham_wait(struct omap_sham_dev *dd, u32 offset, u32 bit)
+{
+	unsigned long timeout = jiffies + DEFAULT_TIMEOUT_INTERVAL;
+
+	while (!(omap_sham_read(dd, offset) & bit)) {
+		if (time_is_before_jiffies(timeout))
+			return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+static void omap_sham_copy_hash(struct ahash_request *req, int out)
+{
+	struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
+	u32 *hash = (u32 *)req->result;
+	int i;
+
+	if (likely(ctx->flags & FLAGS_SHA1)) {
+		/* SHA1 results are in big endian */
+		for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(u32); i++)
+			if (out)
+				hash[i] = be32_to_cpu(omap_sham_read(ctx->dd,
+							SHA_REG_DIGEST(i)));
+			else
+				omap_sham_write(ctx->dd, SHA_REG_DIGEST(i),
+							cpu_to_be32(hash[i]));
+	} else {
+		/* MD5 results are in little endian */
+		for (i = 0; i < MD5_DIGEST_SIZE / sizeof(u32); i++)
+			if (out)
+				hash[i] = le32_to_cpu(omap_sham_read(ctx->dd,
+							SHA_REG_DIGEST(i)));
+			else
+				omap_sham_write(ctx->dd, SHA_REG_DIGEST(i),
+							cpu_to_le32(hash[i]));
+	}
+}
+
+static int omap_sham_write_ctrl(struct omap_sham_dev *dd, size_t length,
+				 int final, int dma)
+{
+	struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
+	u32 val = length << 5, mask;
+
+	if (unlikely(!ctx->digcnt)) {
+
+		clk_enable(dd->iclk);
+
+		if (!(dd->flags & FLAGS_INIT)) {
+			omap_sham_write_mask(dd, SHA_REG_MASK,
+				SHA_REG_MASK_SOFTRESET, SHA_REG_MASK_SOFTRESET);
+
+			if (omap_sham_wait(dd, SHA_REG_SYSSTATUS,
+						SHA_REG_SYSSTATUS_RESETDONE))
+				return -ETIMEDOUT;
+
+			dd->flags |= FLAGS_INIT;
+		}
+	} else {
+		omap_sham_write(dd, SHA_REG_DIGCNT, ctx->digcnt);
+	}
+
+	omap_sham_write_mask(dd, SHA_REG_MASK,
+		SHA_REG_MASK_IT_EN | (dma ? SHA_REG_MASK_DMA_EN : 0),
+		SHA_REG_MASK_IT_EN | SHA_REG_MASK_DMA_EN);
+	/*
+	 * Setting ALGO_CONST only for the first iteration
+	 * and CLOSE_HASH only for the last one.
+	 */
+	if (ctx->flags & FLAGS_SHA1)
+		val |= SHA_REG_CTRL_ALGO;
+	if (!ctx->digcnt)
+		val |= SHA_REG_CTRL_ALGO_CONST;
+	if (final)
+		val |= SHA_REG_CTRL_CLOSE_HASH;
+
+	mask = SHA_REG_CTRL_ALGO_CONST | SHA_REG_CTRL_CLOSE_HASH |
+			SHA_REG_CTRL_ALGO | SHA_REG_CTRL_LENGTH;
+
+	omap_sham_write_mask(dd, SHA_REG_CTRL, val, mask);
+
+	return 0;
+}
+
+static int omap_sham_xmit_cpu(struct omap_sham_dev *dd, const u8 *buf,
+			      size_t length, int final)
+{
+	struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
+	int err, count, len32;
+	const u32 *buffer = (const u32 *)buf;
+
+	dev_dbg(dd->dev, "xmit_cpu: digcnt: %d, length: %d, final: %d\n",
+						ctx->digcnt, length, final);
+
+	err = omap_sham_write_ctrl(dd, length, final, 0);
+	if (err)
+		return err;
+
+	if (omap_sham_wait(dd, SHA_REG_CTRL, SHA_REG_CTRL_INPUT_READY))
+		return -ETIMEDOUT;
+
+	ctx->digcnt += length;
+
+	if (final)
+		ctx->flags |= FLAGS_FINAL; /* catch last interrupt */
+
+	len32 = DIV_ROUND_UP(length, sizeof(u32));
+
+	for (count = 0; count < len32; count++)
+		omap_sham_write(dd, SHA_REG_DIN(count), buffer[count]);
+
+	return -EINPROGRESS;
+}
+
+static int omap_sham_xmit_dma(struct omap_sham_dev *dd, dma_addr_t dma_addr,
+			      size_t length, int final)
+{
+	struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
+	int err, len32;
+
+	dev_dbg(dd->dev, "xmit_dma: digcnt: %d, length: %d, final: %d\n",
+						ctx->digcnt, length, final);
+
+	/* flush cache entries related to our page */
+	if (dma_addr == ctx->dma_addr)
+		dma_sync_single_for_device(dd->dev, dma_addr, length,
+					   DMA_TO_DEVICE);
+
+	len32 = DIV_ROUND_UP(length, sizeof(u32));
+
+	omap_set_dma_transfer_params(dd->dma_lch, OMAP_DMA_DATA_TYPE_S32, len32,
+			1, OMAP_DMA_SYNC_PACKET, dd->dma, OMAP_DMA_DST_SYNC);
+
+	omap_set_dma_src_params(dd->dma_lch, 0, OMAP_DMA_AMODE_POST_INC,
+				dma_addr, 0, 0);
+
+	err = omap_sham_write_ctrl(dd, length, final, 1);
+	if (err)
+		return err;
+
+	ctx->digcnt += length;
+
+	if (final)
+		ctx->flags |= FLAGS_FINAL; /* catch last interrupt */
+
+	dd->flags |= FLAGS_DMA_ACTIVE;
+
+	omap_start_dma(dd->dma_lch);
+
+	return -EINPROGRESS;
+}
+
+static size_t omap_sham_append_buffer(struct omap_sham_reqctx *ctx,
+				const u8 *data, size_t length)
+{
+	size_t count = min(length, ctx->buflen - ctx->bufcnt);
+
+	count = min(count, ctx->total);
+	if (count <= 0)
+		return 0;
+	memcpy(ctx->buffer + ctx->bufcnt, data, count);
+	ctx->bufcnt += count;
+
+	return count;
+}
+
+static size_t omap_sham_append_sg(struct omap_sham_reqctx *ctx)
+{
+	size_t count;
+
+	while (ctx->sg) {
+		count = omap_sham_append_buffer(ctx,
+				sg_virt(ctx->sg) + ctx->offset,
+				ctx->sg->length - ctx->offset);
+		if (!count)
+			break;
+		ctx->offset += count;
+		ctx->total -= count;
+		if (ctx->offset == ctx->sg->length) {
+			ctx->sg = sg_next(ctx->sg);
+			if (ctx->sg)
+				ctx->offset = 0;
+			else
+				ctx->total = 0;
+		}
+	}
+
+	return 0;
+}
+
+static int omap_sham_update_dma_slow(struct omap_sham_dev *dd)
+{
+	struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
+	unsigned int final;
+	size_t count;
+
+	if (!ctx->total)
+		return 0;
+
+	omap_sham_append_sg(ctx);
+
+	final = (ctx->flags & FLAGS_FINUP) && !ctx->total;
+
+	dev_dbg(dd->dev, "slow: bufcnt: %u, digcnt: %d, final: %d\n",
+					 ctx->bufcnt, ctx->digcnt, final);
+
+	if (final || (ctx->bufcnt == ctx->buflen && ctx->total)) {
+		count = ctx->bufcnt;
+		ctx->bufcnt = 0;
+		return omap_sham_xmit_dma(dd, ctx->dma_addr, count, final);
+	}
+
+	return 0;
+}
+
+static int omap_sham_update_dma_fast(struct omap_sham_dev *dd)
+{
+	struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
+	unsigned int length;
+
+	ctx->flags |= FLAGS_FAST;
+
+	length = min(ctx->total, sg_dma_len(ctx->sg));
+	ctx->total = length;
+
+	if (!dma_map_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE)) {
+		dev_err(dd->dev, "dma_map_sg  error\n");
+		return -EINVAL;
+	}
+
+	ctx->total -= length;
+
+	return omap_sham_xmit_dma(dd, sg_dma_address(ctx->sg), length, 1);
+}
+
+static int omap_sham_update_cpu(struct omap_sham_dev *dd)
+{
+	struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
+	int bufcnt;
+
+	omap_sham_append_sg(ctx);
+	bufcnt = ctx->bufcnt;
+	ctx->bufcnt = 0;
+
+	return omap_sham_xmit_cpu(dd, ctx->buffer, bufcnt, 1);
+}
+
+static int omap_sham_update_dma_stop(struct omap_sham_dev *dd)
+{
+	struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
+
+	omap_stop_dma(dd->dma_lch);
+	if (ctx->flags & FLAGS_FAST)
+		dma_unmap_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE);
+
+	return 0;
+}
+
+static void omap_sham_cleanup(struct ahash_request *req)
+{
+	struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
+	struct omap_sham_dev *dd = ctx->dd;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dd->lock, flags);
+	if (ctx->flags & FLAGS_CLEAN) {
+		spin_unlock_irqrestore(&dd->lock, flags);
+		return;
+	}
+	ctx->flags |= FLAGS_CLEAN;
+	spin_unlock_irqrestore(&dd->lock, flags);
+
+	if (ctx->digcnt)
+		clk_disable(dd->iclk);
+
+	if (ctx->dma_addr)
+		dma_unmap_single(dd->dev, ctx->dma_addr, ctx->buflen,
+				 DMA_TO_DEVICE);
+
+	if (ctx->buffer)
+		free_page((unsigned long)ctx->buffer);
+
+	dev_dbg(dd->dev, "digcnt: %d, bufcnt: %d\n", ctx->digcnt, ctx->bufcnt);
+}
+
+static int omap_sham_init(struct ahash_request *req)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct omap_sham_ctx *tctx = crypto_ahash_ctx(tfm);
+	struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
+	struct omap_sham_dev *dd = NULL, *tmp;
+
+	spin_lock_bh(&sham.lock);
+	if (!tctx->dd) {
+		list_for_each_entry(tmp, &sham.dev_list, list) {
+			dd = tmp;
+			break;
+		}
+		tctx->dd = dd;
+	} else {
+		dd = tctx->dd;
+	}
+	spin_unlock_bh(&sham.lock);
+
+	ctx->dd = dd;
+
+	ctx->flags = 0;
+
+	ctx->flags |= FLAGS_FIRST;
+
+	dev_dbg(dd->dev, "init: digest size: %d\n",
+		crypto_ahash_digestsize(tfm));
+
+	if (crypto_ahash_digestsize(tfm) == SHA1_DIGEST_SIZE)
+		ctx->flags |= FLAGS_SHA1;
+
+	ctx->bufcnt = 0;
+	ctx->digcnt = 0;
+
+	ctx->buflen = PAGE_SIZE;
+	ctx->buffer = (void *)__get_free_page(
+				(req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+				GFP_KERNEL : GFP_ATOMIC);
+	if (!ctx->buffer)
+		return -ENOMEM;
+
+	ctx->dma_addr = dma_map_single(dd->dev, ctx->buffer, ctx->buflen,
+					DMA_TO_DEVICE);
+	if (dma_mapping_error(dd->dev, ctx->dma_addr)) {
+		dev_err(dd->dev, "dma %u bytes error\n", ctx->buflen);
+		free_page((unsigned long)ctx->buffer);
+		return -EINVAL;
+	}
+
+	if (tctx->flags & FLAGS_HMAC) {
+		struct omap_sham_hmac_ctx *bctx = tctx->base;
+
+		memcpy(ctx->buffer, bctx->ipad, SHA1_MD5_BLOCK_SIZE);
+		ctx->bufcnt = SHA1_MD5_BLOCK_SIZE;
+		ctx->flags |= FLAGS_HMAC;
+	}
+
+	return 0;
+
+}
+
+static int omap_sham_update_req(struct omap_sham_dev *dd)
+{
+	struct ahash_request *req = dd->req;
+	struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
+	int err;
+
+	dev_dbg(dd->dev, "update_req: total: %u, digcnt: %d, finup: %d\n",
+		 ctx->total, ctx->digcnt, (ctx->flags & FLAGS_FINUP) != 0);
+
+	if (ctx->flags & FLAGS_CPU)
+		err = omap_sham_update_cpu(dd);
+	else if (ctx->flags & FLAGS_FAST)
+		err = omap_sham_update_dma_fast(dd);
+	else
+		err = omap_sham_update_dma_slow(dd);
+
+	/* wait for dma completion before can take more data */
+	dev_dbg(dd->dev, "update: err: %d, digcnt: %d\n", err, ctx->digcnt);
+
+	return err;
+}
+
+static int omap_sham_final_req(struct omap_sham_dev *dd)
+{
+	struct ahash_request *req = dd->req;
+	struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
+	int err = 0, use_dma = 1;
+
+	if (ctx->bufcnt <= 64)
+		/* faster to handle last block with cpu */
+		use_dma = 0;
+
+	if (use_dma)
+		err = omap_sham_xmit_dma(dd, ctx->dma_addr, ctx->bufcnt, 1);
+	else
+		err = omap_sham_xmit_cpu(dd, ctx->buffer, ctx->bufcnt, 1);
+
+	ctx->bufcnt = 0;
+
+	if (err != -EINPROGRESS)
+		omap_sham_cleanup(req);
+
+	dev_dbg(dd->dev, "final_req: err: %d\n", err);
+
+	return err;
+}
+
+static int omap_sham_finish_req_hmac(struct ahash_request *req)
+{
+	struct omap_sham_ctx *tctx = crypto_tfm_ctx(req->base.tfm);
+	struct omap_sham_hmac_ctx *bctx = tctx->base;
+	int bs = crypto_shash_blocksize(bctx->shash);
+	int ds = crypto_shash_digestsize(bctx->shash);
+	struct {
+		struct shash_desc shash;
+		char ctx[crypto_shash_descsize(bctx->shash)];
+	} desc;
+
+	desc.shash.tfm = bctx->shash;
+	desc.shash.flags = 0; /* not CRYPTO_TFM_REQ_MAY_SLEEP */
+
+	return crypto_shash_init(&desc.shash) ?:
+	       crypto_shash_update(&desc.shash, bctx->opad, bs) ?:
+	       crypto_shash_finup(&desc.shash, req->result, ds, req->result);
+}
+
+static void omap_sham_finish_req(struct ahash_request *req, int err)
+{
+	struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
+
+	if (!err) {
+		omap_sham_copy_hash(ctx->dd->req, 1);
+		if (ctx->flags & FLAGS_HMAC)
+			err = omap_sham_finish_req_hmac(req);
+	}
+
+	if (ctx->flags & FLAGS_FINAL)
+		omap_sham_cleanup(req);
+
+	clear_bit(FLAGS_BUSY, &ctx->dd->flags);
+
+	if (req->base.complete)
+		req->base.complete(&req->base, err);
+}
+
+static int omap_sham_handle_queue(struct omap_sham_dev *dd)
+{
+	struct crypto_async_request *async_req, *backlog;
+	struct omap_sham_reqctx *ctx;
+	struct ahash_request *req, *prev_req;
+	unsigned long flags;
+	int err = 0;
+
+	if (test_and_set_bit(FLAGS_BUSY, &dd->flags))
+		return 0;
+
+	spin_lock_irqsave(&dd->lock, flags);
+	backlog = crypto_get_backlog(&dd->queue);
+	async_req = crypto_dequeue_request(&dd->queue);
+	if (!async_req)
+		clear_bit(FLAGS_BUSY, &dd->flags);
+	spin_unlock_irqrestore(&dd->lock, flags);
+
+	if (!async_req)
+		return 0;
+
+	if (backlog)
+		backlog->complete(backlog, -EINPROGRESS);
+
+	req = ahash_request_cast(async_req);
+
+	prev_req = dd->req;
+	dd->req = req;
+
+	ctx = ahash_request_ctx(req);
+
+	dev_dbg(dd->dev, "handling new req, op: %lu, nbytes: %d\n",
+						ctx->op, req->nbytes);
+
+	if (req != prev_req && ctx->digcnt)
+		/* request has changed - restore hash */
+		omap_sham_copy_hash(req, 0);
+
+	if (ctx->op == OP_UPDATE) {
+		err = omap_sham_update_req(dd);
+		if (err != -EINPROGRESS && (ctx->flags & FLAGS_FINUP))
+			/* no final() after finup() */
+			err = omap_sham_final_req(dd);
+	} else if (ctx->op == OP_FINAL) {
+		err = omap_sham_final_req(dd);
+	}
+
+	if (err != -EINPROGRESS) {
+		/* done_task will not finish it, so do it here */
+		omap_sham_finish_req(req, err);
+		tasklet_schedule(&dd->queue_task);
+	}
+
+	dev_dbg(dd->dev, "exit, err: %d\n", err);
+
+	return err;
+}
+
+static int omap_sham_enqueue(struct ahash_request *req, unsigned int op)
+{
+	struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
+	struct omap_sham_ctx *tctx = crypto_tfm_ctx(req->base.tfm);
+	struct omap_sham_dev *dd = tctx->dd;
+	unsigned long flags;
+	int err;
+
+	ctx->op = op;
+
+	spin_lock_irqsave(&dd->lock, flags);
+	err = ahash_enqueue_request(&dd->queue, req);
+	spin_unlock_irqrestore(&dd->lock, flags);
+
+	omap_sham_handle_queue(dd);
+
+	return err;
+}
+
+static int omap_sham_update(struct ahash_request *req)
+{
+	struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
+
+	if (!req->nbytes)
+		return 0;
+
+	ctx->total = req->nbytes;
+	ctx->sg = req->src;
+	ctx->offset = 0;
+
+	if (ctx->flags & FLAGS_FINUP) {
+		if ((ctx->digcnt + ctx->bufcnt + ctx->total) < 9) {
+			/*
+			* OMAP HW accel works only with buffers >= 9
+			* will switch to bypass in final()
+			* final has the same request and data
+			*/
+			omap_sham_append_sg(ctx);
+			return 0;
+		} else if (ctx->bufcnt + ctx->total <= 64) {
+			ctx->flags |= FLAGS_CPU;
+		} else if (!ctx->bufcnt && sg_is_last(ctx->sg)) {
+			/* may be can use faster functions */
+			int aligned = IS_ALIGNED((u32)ctx->sg->offset,
+								sizeof(u32));
+
+			if (aligned && (ctx->flags & FLAGS_FIRST))
+				/* digest: first and final */
+				ctx->flags |= FLAGS_FAST;
+
+			ctx->flags &= ~FLAGS_FIRST;
+		}
+	} else if (ctx->bufcnt + ctx->total <= ctx->buflen) {
+		/* if not finaup -> not fast */
+		omap_sham_append_sg(ctx);
+		return 0;
+	}
+
+	return omap_sham_enqueue(req, OP_UPDATE);
+}
+
+static int omap_sham_shash_digest(struct crypto_shash *shash, u32 flags,
+				  const u8 *data, unsigned int len, u8 *out)
+{
+	struct {
+		struct shash_desc shash;
+		char ctx[crypto_shash_descsize(shash)];
+	} desc;
+
+	desc.shash.tfm = shash;
+	desc.shash.flags = flags & CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	return crypto_shash_digest(&desc.shash, data, len, out);
+}
+
+static int omap_sham_final_shash(struct ahash_request *req)
+{
+	struct omap_sham_ctx *tctx = crypto_tfm_ctx(req->base.tfm);
+	struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
+
+	return omap_sham_shash_digest(tctx->fallback, req->base.flags,
+				      ctx->buffer, ctx->bufcnt, req->result);
+}
+
+static int omap_sham_final(struct ahash_request *req)
+{
+	struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
+	int err = 0;
+
+	ctx->flags |= FLAGS_FINUP;
+
+	/* OMAP HW accel works only with buffers >= 9 */
+	/* HMAC is always >= 9 because of ipad */
+	if ((ctx->digcnt + ctx->bufcnt) < 9)
+		err = omap_sham_final_shash(req);
+	else if (ctx->bufcnt)
+		return omap_sham_enqueue(req, OP_FINAL);
+
+	omap_sham_cleanup(req);
+
+	return err;
+}
+
+static int omap_sham_finup(struct ahash_request *req)
+{
+	struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
+	int err1, err2;
+
+	ctx->flags |= FLAGS_FINUP;
+
+	err1 = omap_sham_update(req);
+	if (err1 == -EINPROGRESS)
+		return err1;
+	/*
+	 * final() has to be always called to cleanup resources
+	 * even if udpate() failed, except EINPROGRESS
+	 */
+	err2 = omap_sham_final(req);
+
+	return err1 ?: err2;
+}
+
+static int omap_sham_digest(struct ahash_request *req)
+{
+	return omap_sham_init(req) ?: omap_sham_finup(req);
+}
+
+static int omap_sham_setkey(struct crypto_ahash *tfm, const u8 *key,
+		      unsigned int keylen)
+{
+	struct omap_sham_ctx *tctx = crypto_ahash_ctx(tfm);
+	struct omap_sham_hmac_ctx *bctx = tctx->base;
+	int bs = crypto_shash_blocksize(bctx->shash);
+	int ds = crypto_shash_digestsize(bctx->shash);
+	int err, i;
+	err = crypto_shash_setkey(tctx->fallback, key, keylen);
+	if (err)
+		return err;
+
+	if (keylen > bs) {
+		err = omap_sham_shash_digest(bctx->shash,
+				crypto_shash_get_flags(bctx->shash),
+				key, keylen, bctx->ipad);
+		if (err)
+			return err;
+		keylen = ds;
+	} else {
+		memcpy(bctx->ipad, key, keylen);
+	}
+
+	memset(bctx->ipad + keylen, 0, bs - keylen);
+	memcpy(bctx->opad, bctx->ipad, bs);
+
+	for (i = 0; i < bs; i++) {
+		bctx->ipad[i] ^= 0x36;
+		bctx->opad[i] ^= 0x5c;
+	}
+
+	return err;
+}
+
+static int omap_sham_cra_init_alg(struct crypto_tfm *tfm, const char *alg_base)
+{
+	struct omap_sham_ctx *tctx = crypto_tfm_ctx(tfm);
+	const char *alg_name = crypto_tfm_alg_name(tfm);
+
+	/* Allocate a fallback and abort if it failed. */
+	tctx->fallback = crypto_alloc_shash(alg_name, 0,
+					    CRYPTO_ALG_NEED_FALLBACK);
+	if (IS_ERR(tctx->fallback)) {
+		pr_err("omap-sham: fallback driver '%s' "
+				"could not be loaded.\n", alg_name);
+		return PTR_ERR(tctx->fallback);
+	}
+
+	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+				 sizeof(struct omap_sham_reqctx));
+
+	if (alg_base) {
+		struct omap_sham_hmac_ctx *bctx = tctx->base;
+		tctx->flags |= FLAGS_HMAC;
+		bctx->shash = crypto_alloc_shash(alg_base, 0,
+						CRYPTO_ALG_NEED_FALLBACK);
+		if (IS_ERR(bctx->shash)) {
+			pr_err("omap-sham: base driver '%s' "
+					"could not be loaded.\n", alg_base);
+			crypto_free_shash(tctx->fallback);
+			return PTR_ERR(bctx->shash);
+		}
+
+	}
+
+	return 0;
+}
+
+static int omap_sham_cra_init(struct crypto_tfm *tfm)
+{
+	return omap_sham_cra_init_alg(tfm, NULL);
+}
+
+static int omap_sham_cra_sha1_init(struct crypto_tfm *tfm)
+{
+	return omap_sham_cra_init_alg(tfm, "sha1");
+}
+
+static int omap_sham_cra_md5_init(struct crypto_tfm *tfm)
+{
+	return omap_sham_cra_init_alg(tfm, "md5");
+}
+
+static void omap_sham_cra_exit(struct crypto_tfm *tfm)
+{
+	struct omap_sham_ctx *tctx = crypto_tfm_ctx(tfm);
+
+	crypto_free_shash(tctx->fallback);
+	tctx->fallback = NULL;
+
+	if (tctx->flags & FLAGS_HMAC) {
+		struct omap_sham_hmac_ctx *bctx = tctx->base;
+		crypto_free_shash(bctx->shash);
+	}
+}
+
+static struct ahash_alg algs[] = {
+{
+	.init		= omap_sham_init,
+	.update		= omap_sham_update,
+	.final		= omap_sham_final,
+	.finup		= omap_sham_finup,
+	.digest		= omap_sham_digest,
+	.halg.digestsize	= SHA1_DIGEST_SIZE,
+	.halg.base	= {
+		.cra_name		= "sha1",
+		.cra_driver_name	= "omap-sha1",
+		.cra_priority		= 100,
+		.cra_flags		= CRYPTO_ALG_TYPE_AHASH |
+						CRYPTO_ALG_ASYNC |
+						CRYPTO_ALG_NEED_FALLBACK,
+		.cra_blocksize		= SHA1_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct omap_sham_ctx),
+		.cra_alignmask		= 0,
+		.cra_module		= THIS_MODULE,
+		.cra_init		= omap_sham_cra_init,
+		.cra_exit		= omap_sham_cra_exit,
+	}
+},
+{
+	.init		= omap_sham_init,
+	.update		= omap_sham_update,
+	.final		= omap_sham_final,
+	.finup		= omap_sham_finup,
+	.digest		= omap_sham_digest,
+	.halg.digestsize	= MD5_DIGEST_SIZE,
+	.halg.base	= {
+		.cra_name		= "md5",
+		.cra_driver_name	= "omap-md5",
+		.cra_priority		= 100,
+		.cra_flags		= CRYPTO_ALG_TYPE_AHASH |
+						CRYPTO_ALG_ASYNC |
+						CRYPTO_ALG_NEED_FALLBACK,
+		.cra_blocksize		= SHA1_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct omap_sham_ctx),
+		.cra_alignmask		= 0,
+		.cra_module		= THIS_MODULE,
+		.cra_init		= omap_sham_cra_init,
+		.cra_exit		= omap_sham_cra_exit,
+	}
+},
+{
+	.init		= omap_sham_init,
+	.update		= omap_sham_update,
+	.final		= omap_sham_final,
+	.finup		= omap_sham_finup,
+	.digest		= omap_sham_digest,
+	.setkey		= omap_sham_setkey,
+	.halg.digestsize	= SHA1_DIGEST_SIZE,
+	.halg.base	= {
+		.cra_name		= "hmac(sha1)",
+		.cra_driver_name	= "omap-hmac-sha1",
+		.cra_priority		= 100,
+		.cra_flags		= CRYPTO_ALG_TYPE_AHASH |
+						CRYPTO_ALG_ASYNC |
+						CRYPTO_ALG_NEED_FALLBACK,
+		.cra_blocksize		= SHA1_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct omap_sham_ctx) +
+					sizeof(struct omap_sham_hmac_ctx),
+		.cra_alignmask		= 0,
+		.cra_module		= THIS_MODULE,
+		.cra_init		= omap_sham_cra_sha1_init,
+		.cra_exit		= omap_sham_cra_exit,
+	}
+},
+{
+	.init		= omap_sham_init,
+	.update		= omap_sham_update,
+	.final		= omap_sham_final,
+	.finup		= omap_sham_finup,
+	.digest		= omap_sham_digest,
+	.setkey		= omap_sham_setkey,
+	.halg.digestsize	= MD5_DIGEST_SIZE,
+	.halg.base	= {
+		.cra_name		= "hmac(md5)",
+		.cra_driver_name	= "omap-hmac-md5",
+		.cra_priority		= 100,
+		.cra_flags		= CRYPTO_ALG_TYPE_AHASH |
+						CRYPTO_ALG_ASYNC |
+						CRYPTO_ALG_NEED_FALLBACK,
+		.cra_blocksize		= SHA1_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct omap_sham_ctx) +
+					sizeof(struct omap_sham_hmac_ctx),
+		.cra_alignmask		= 0,
+		.cra_module		= THIS_MODULE,
+		.cra_init		= omap_sham_cra_md5_init,
+		.cra_exit		= omap_sham_cra_exit,
+	}
+}
+};
+
+static void omap_sham_done_task(unsigned long data)
+{
+	struct omap_sham_dev *dd = (struct omap_sham_dev *)data;
+	struct ahash_request *req = dd->req;
+	struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
+	int ready = 1;
+
+	if (ctx->flags & FLAGS_OUTPUT_READY) {
+		ctx->flags &= ~FLAGS_OUTPUT_READY;
+		ready = 1;
+	}
+
+	if (dd->flags & FLAGS_DMA_ACTIVE) {
+		dd->flags &= ~FLAGS_DMA_ACTIVE;
+		omap_sham_update_dma_stop(dd);
+		omap_sham_update_dma_slow(dd);
+	}
+
+	if (ready && !(dd->flags & FLAGS_DMA_ACTIVE)) {
+		dev_dbg(dd->dev, "update done\n");
+		/* finish curent request */
+		omap_sham_finish_req(req, 0);
+		/* start new request */
+		omap_sham_handle_queue(dd);
+	}
+}
+
+static void omap_sham_queue_task(unsigned long data)
+{
+	struct omap_sham_dev *dd = (struct omap_sham_dev *)data;
+
+	omap_sham_handle_queue(dd);
+}
+
+static irqreturn_t omap_sham_irq(int irq, void *dev_id)
+{
+	struct omap_sham_dev *dd = dev_id;
+	struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
+
+	if (!ctx) {
+		dev_err(dd->dev, "unknown interrupt.\n");
+		return IRQ_HANDLED;
+	}
+
+	if (unlikely(ctx->flags & FLAGS_FINAL))
+		/* final -> allow device to go to power-saving mode */
+		omap_sham_write_mask(dd, SHA_REG_CTRL, 0, SHA_REG_CTRL_LENGTH);
+
+	omap_sham_write_mask(dd, SHA_REG_CTRL, SHA_REG_CTRL_OUTPUT_READY,
+				 SHA_REG_CTRL_OUTPUT_READY);
+	omap_sham_read(dd, SHA_REG_CTRL);
+
+	ctx->flags |= FLAGS_OUTPUT_READY;
+	tasklet_schedule(&dd->done_task);
+
+	return IRQ_HANDLED;
+}
+
+static void omap_sham_dma_callback(int lch, u16 ch_status, void *data)
+{
+	struct omap_sham_dev *dd = data;
+
+	if (likely(lch == dd->dma_lch))
+		tasklet_schedule(&dd->done_task);
+}
+
+static int omap_sham_dma_init(struct omap_sham_dev *dd)
+{
+	int err;
+
+	dd->dma_lch = -1;
+
+	err = omap_request_dma(dd->dma, dev_name(dd->dev),
+			omap_sham_dma_callback, dd, &dd->dma_lch);
+	if (err) {
+		dev_err(dd->dev, "Unable to request DMA channel\n");
+		return err;
+	}
+	omap_set_dma_dest_params(dd->dma_lch, 0,
+			OMAP_DMA_AMODE_CONSTANT,
+			dd->phys_base + SHA_REG_DIN(0), 0, 16);
+
+	omap_set_dma_dest_burst_mode(dd->dma_lch,
+			OMAP_DMA_DATA_BURST_16);
+
+	return 0;
+}
+
+static void omap_sham_dma_cleanup(struct omap_sham_dev *dd)
+{
+	if (dd->dma_lch >= 0) {
+		omap_free_dma(dd->dma_lch);
+		dd->dma_lch = -1;
+	}
+}
+
+static int __devinit omap_sham_probe(struct platform_device *pdev)
+{
+	struct omap_sham_dev *dd;
+	struct device *dev = &pdev->dev;
+	struct resource *res;
+	int err, i, j;
+
+	dd = kzalloc(sizeof(struct omap_sham_dev), GFP_KERNEL);
+	if (dd == NULL) {
+		dev_err(dev, "unable to alloc data struct.\n");
+		err = -ENOMEM;
+		goto data_err;
+	}
+	dd->dev = dev;
+	platform_set_drvdata(pdev, dd);
+
+	INIT_LIST_HEAD(&dd->list);
+	spin_lock_init(&dd->lock);
+	tasklet_init(&dd->done_task, omap_sham_done_task, (unsigned long)dd);
+	tasklet_init(&dd->queue_task, omap_sham_queue_task, (unsigned long)dd);
+	crypto_init_queue(&dd->queue, OMAP_SHAM_QUEUE_LENGTH);
+
+	dd->irq = -1;
+
+	/* Get the base address */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(dev, "no MEM resource info\n");
+		err = -ENODEV;
+		goto res_err;
+	}
+	dd->phys_base = res->start;
+
+	/* Get the DMA */
+	res = platform_get_resource(pdev, IORESOURCE_DMA, 0);
+	if (!res) {
+		dev_err(dev, "no DMA resource info\n");
+		err = -ENODEV;
+		goto res_err;
+	}
+	dd->dma = res->start;
+
+	/* Get the IRQ */
+	dd->irq = platform_get_irq(pdev,  0);
+	if (dd->irq < 0) {
+		dev_err(dev, "no IRQ resource info\n");
+		err = dd->irq;
+		goto res_err;
+	}
+
+	err = request_irq(dd->irq, omap_sham_irq,
+			IRQF_TRIGGER_LOW, dev_name(dev), dd);
+	if (err) {
+		dev_err(dev, "unable to request irq.\n");
+		goto res_err;
+	}
+
+	err = omap_sham_dma_init(dd);
+	if (err)
+		goto dma_err;
+
+	/* Initializing the clock */
+	dd->iclk = clk_get(dev, "ick");
+	if (!dd->iclk) {
+		dev_err(dev, "clock intialization failed.\n");
+		err = -ENODEV;
+		goto clk_err;
+	}
+
+	dd->io_base = ioremap(dd->phys_base, SZ_4K);
+	if (!dd->io_base) {
+		dev_err(dev, "can't ioremap\n");
+		err = -ENOMEM;
+		goto io_err;
+	}
+
+	clk_enable(dd->iclk);
+	dev_info(dev, "hw accel on OMAP rev %u.%u\n",
+		(omap_sham_read(dd, SHA_REG_REV) & SHA_REG_REV_MAJOR) >> 4,
+		omap_sham_read(dd, SHA_REG_REV) & SHA_REG_REV_MINOR);
+	clk_disable(dd->iclk);
+
+	spin_lock(&sham.lock);
+	list_add_tail(&dd->list, &sham.dev_list);
+	spin_unlock(&sham.lock);
+
+	for (i = 0; i < ARRAY_SIZE(algs); i++) {
+		err = crypto_register_ahash(&algs[i]);
+		if (err)
+			goto err_algs;
+	}
+
+	return 0;
+
+err_algs:
+	for (j = 0; j < i; j++)
+		crypto_unregister_ahash(&algs[j]);
+	iounmap(dd->io_base);
+io_err:
+	clk_put(dd->iclk);
+clk_err:
+	omap_sham_dma_cleanup(dd);
+dma_err:
+	if (dd->irq >= 0)
+		free_irq(dd->irq, dd);
+res_err:
+	kfree(dd);
+	dd = NULL;
+data_err:
+	dev_err(dev, "initialization failed.\n");
+
+	return err;
+}
+
+static int __devexit omap_sham_remove(struct platform_device *pdev)
+{
+	static struct omap_sham_dev *dd;
+	int i;
+
+	dd = platform_get_drvdata(pdev);
+	if (!dd)
+		return -ENODEV;
+	spin_lock(&sham.lock);
+	list_del(&dd->list);
+	spin_unlock(&sham.lock);
+	for (i = 0; i < ARRAY_SIZE(algs); i++)
+		crypto_unregister_ahash(&algs[i]);
+	tasklet_kill(&dd->done_task);
+	tasklet_kill(&dd->queue_task);
+	iounmap(dd->io_base);
+	clk_put(dd->iclk);
+	omap_sham_dma_cleanup(dd);
+	if (dd->irq >= 0)
+		free_irq(dd->irq, dd);
+	kfree(dd);
+	dd = NULL;
+
+	return 0;
+}
+
+static struct platform_driver omap_sham_driver = {
+	.probe	= omap_sham_probe,
+	.remove	= omap_sham_remove,
+	.driver	= {
+		.name	= "omap-sham",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int __init omap_sham_mod_init(void)
+{
+	pr_info("loading %s driver\n", "omap-sham");
+
+	if (!cpu_class_is_omap2() ||
+		omap_type() != OMAP2_DEVICE_TYPE_SEC) {
+		pr_err("Unsupported cpu\n");
+		return -ENODEV;
+	}
+
+	return platform_driver_register(&omap_sham_driver);
+}
+
+static void __exit omap_sham_mod_exit(void)
+{
+	platform_driver_unregister(&omap_sham_driver);
+}
+
+module_init(omap_sham_mod_init);
+module_exit(omap_sham_mod_exit);
+
+MODULE_DESCRIPTION("OMAP SHA1/MD5 hw acceleration support.");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Dmitry Kasatkin");
diff --git a/kernel/padata.c b/kernel/padata.c
index fd03513..5b44d0f 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -29,7 +29,7 @@
 #include <linux/rcupdate.h>
 
 #define MAX_SEQ_NR INT_MAX - NR_CPUS
-#define MAX_OBJ_NUM 10000 * NR_CPUS
+#define MAX_OBJ_NUM 1000
 
 static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index)
 {
@@ -570,8 +570,8 @@
 }
 EXPORT_SYMBOL(padata_stop);
 
-static int __cpuinit padata_cpu_callback(struct notifier_block *nfb,
-					 unsigned long action, void *hcpu)
+static int padata_cpu_callback(struct notifier_block *nfb,
+			       unsigned long action, void *hcpu)
 {
 	int err;
 	struct padata_instance *pinst;