Merge branch 'for-next' of git://gitorious.org/usb/usb into usb-next

* 'for-next' of git://gitorious.org/usb/usb: (47 commits)
  usb: musb: Enable DMA mode1 RX for transfers without short packets
  usb: musb: fix build breakage
  usb: gadget: audio: queue wLength-sized requests
  usb: gadget: audio: actually support both speeds
  usb: gadget: storage: make FSG_NUM_BUFFERS variable size
  USB: gadget: storage: remove alignment assumption
  usb: gadget: storage: adapt logic block size to bound block devices
  usb: dwc3: gadget: improve debug on link state change
  usb: dwc3: omap: set idle and standby modes
  usb: dwc3: ep0: introduce ep0_expect_in flag
  usb: dwc3: ep0: giveback requests on stall_and_restart
  usb: dwc3: gadget: drop the useless dma_sync_single* calls
  usb: dwc3: gadget: fix GCTL programming
  usb: dwc3: define ScaleDown macro helper
  usb: dwc3: Fix definition of DWC3_GCTL_U2RSTECN
  usb: dwc3: gadget: do not map/unmap ZLP transfers
  usb: dwc3: omap: fix IRQ handling
  usb: dwc3: omap: change IRQ name to dwc3-omap
  usb: dwc3: add module.h to dwc3-omap.c and core.c
  usb: dwc3: omap: distinguish between SW and HW modes
  ...
diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile
index f343365..7317a2b 100644
--- a/arch/arm/mach-omap2/Makefile
+++ b/arch/arm/mach-omap2/Makefile
@@ -242,14 +242,11 @@
 obj-$(CONFIG_MACH_OMAP3_TOUCHBOOK)	+= board-omap3touchbook.o \
 					   hsmmc.o
 obj-$(CONFIG_MACH_OMAP_4430SDP)		+= board-4430sdp.o \
-					   hsmmc.o \
-					   omap_phy_internal.o
+					   hsmmc.o
 obj-$(CONFIG_MACH_OMAP4_PANDA)		+= board-omap4panda.o \
-					   hsmmc.o \
-					   omap_phy_internal.o
+					   hsmmc.o
 
-obj-$(CONFIG_MACH_OMAP3517EVM)		+= board-am3517evm.o \
-					   omap_phy_internal.o \
+obj-$(CONFIG_MACH_OMAP3517EVM)		+= board-am3517evm.o
 
 obj-$(CONFIG_MACH_CRANEBOARD)		+= board-am3517crane.o
 
@@ -260,6 +257,8 @@
 usbfs-$(CONFIG_ARCH_OMAP_OTG)		:= usb-fs.o
 obj-y					+= $(usbfs-m) $(usbfs-y)
 obj-y					+= usb-musb.o
+obj-y					+= omap_phy_internal.o
+
 obj-$(CONFIG_MACH_OMAP2_TUSB6010)	+= usb-tusb6010.o
 obj-y					+= usb-host.o
 
diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index 443e4fb..64ba097 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -37,6 +37,7 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
@@ -241,6 +242,15 @@
 	u32			reg;
 	int			ret;
 
+	reg = dwc3_readl(dwc->regs, DWC3_GSNPSID);
+	/* This should read as U3 followed by revision number */
+	if ((reg & DWC3_GSNPSID_MASK) != 0x55330000) {
+		dev_err(dwc->dev, "this is not a DesignWare USB3 DRD Core\n");
+		ret = -ENODEV;
+		goto err0;
+	}
+	dwc->revision = reg & DWC3_GSNPSREV_MASK;
+
 	dwc3_core_soft_reset(dwc);
 
 	/* issue device SoftReset too */
@@ -260,16 +270,6 @@
 		cpu_relax();
 	} while (true);
 
-	reg = dwc3_readl(dwc->regs, DWC3_GSNPSID);
-	/* This should read as U3 followed by revision number */
-	if ((reg & DWC3_GSNPSID_MASK) != 0x55330000) {
-		dev_err(dwc->dev, "this is not a DesignWare USB3 DRD Core\n");
-		ret = -ENODEV;
-		goto err0;
-	}
-
-	dwc->revision = reg & DWC3_GSNPSREV_MASK;
-
 	ret = dwc3_alloc_event_buffers(dwc, DWC3_EVENT_BUFFERS_NUM,
 			DWC3_EVENT_BUFFERS_SIZE);
 	if (ret) {
diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index 83b2960..07d2018 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -147,7 +147,7 @@
 
 /* Global Configuration Register */
 #define DWC3_GCTL_PWRDNSCALE(n)	(n << 19)
-#define DWC3_GCTL_U2RSTECN	16
+#define DWC3_GCTL_U2RSTECN	(1 << 16)
 #define DWC3_GCTL_RAMCLKSEL(x)	((x & DWC3_GCTL_CLK_MASK) << 6)
 #define DWC3_GCTL_CLK_BUS	(0)
 #define DWC3_GCTL_CLK_PIPE	(1)
@@ -160,6 +160,7 @@
 #define DWC3_GCTL_PRTCAP_OTG	3
 
 #define DWC3_GCTL_CORESOFTRESET	(1 << 11)
+#define DWC3_GCTL_SCALEDOWN(n)	(n << 4)
 #define DWC3_GCTL_DISSCRAMBLE	(1 << 3)
 
 /* Global USB2 PHY Configuration Register */
@@ -348,7 +349,9 @@
 #define DWC3_EP_WEDGE		(1 << 2)
 #define DWC3_EP_BUSY		(1 << 4)
 #define DWC3_EP_PENDING_REQUEST	(1 << 5)
-#define DWC3_EP_WILL_SHUTDOWN	(1 << 6)
+
+	/* This last one is specific to EP0 */
+#define DWC3_EP0_DIR_IN		(1 << 31)
 
 	unsigned		current_trb;
 
@@ -368,18 +371,19 @@
 	DWC3_PHY_USB2,
 };
 
+enum dwc3_ep0_next {
+	DWC3_EP0_UNKNOWN = 0,
+	DWC3_EP0_COMPLETE,
+	DWC3_EP0_NRDY_SETUP,
+	DWC3_EP0_NRDY_DATA,
+	DWC3_EP0_NRDY_STATUS,
+};
+
 enum dwc3_ep0_state {
 	EP0_UNCONNECTED		= 0,
-	EP0_IDLE,
-	EP0_IN_DATA_PHASE,
-	EP0_OUT_DATA_PHASE,
-	EP0_IN_WAIT_GADGET,
-	EP0_OUT_WAIT_GADGET,
-	EP0_IN_WAIT_NRDY,
-	EP0_OUT_WAIT_NRDY,
-	EP0_IN_STATUS_PHASE,
-	EP0_OUT_STATUS_PHASE,
-	EP0_STALL,
+	EP0_SETUP_PHASE,
+	EP0_DATA_PHASE,
+	EP0_STATUS_PHASE,
 };
 
 enum dwc3_link_state {
@@ -503,13 +507,15 @@
 
 /**
  * struct dwc3 - representation of our controller
- * ctrl_req: usb control request which is used for ep0
- * ep0_trb: trb which is used for the ctrl_req
- * setup_buf: used while precessing STD USB requests
- * ctrl_req_addr: dma address of ctrl_req
- * ep0_trb: dma address of ep0_trb
- * ep0_usb_req: dummy req used while handling STD USB requests
- * setup_buf_addr: dma address of setup_buf
+ * @ctrl_req: usb control request which is used for ep0
+ * @ep0_trb: trb which is used for the ctrl_req
+ * @ep0_bounce: bounce buffer for ep0
+ * @setup_buf: used while precessing STD USB requests
+ * @ctrl_req_addr: dma address of ctrl_req
+ * @ep0_trb: dma address of ep0_trb
+ * @ep0_usb_req: dummy req used while handling STD USB requests
+ * @setup_buf_addr: dma address of setup_buf
+ * @ep0_bounce_addr: dma address of ep0_bounce
  * @lock: for synchronizing
  * @dev: pointer to our struct device
  * @event_buffer_list: a list of event buffers
@@ -522,6 +528,9 @@
  * @is_selfpowered: true when we are selfpowered
  * @three_stage_setup: set if we perform a three phase setup
  * @ep0_status_pending: ep0 status response without a req is pending
+ * @ep0_bounced: true when we used bounce buffer
+ * @ep0_expect_in: true when we expect a DATA IN transfer
+ * @ep0_next_event: hold the next expected event
  * @ep0state: state of endpoint zero
  * @link_state: link state
  * @speed: device speed (super, high, full, low)
@@ -531,10 +540,12 @@
 struct dwc3 {
 	struct usb_ctrlrequest	*ctrl_req;
 	struct dwc3_trb_hw	*ep0_trb;
+	void			*ep0_bounce;
 	u8			*setup_buf;
 	dma_addr_t		ctrl_req_addr;
 	dma_addr_t		ep0_trb_addr;
 	dma_addr_t		setup_buf_addr;
+	dma_addr_t		ep0_bounce_addr;
 	struct usb_request	ep0_usb_req;
 	/* device lock */
 	spinlock_t		lock;
@@ -564,7 +575,10 @@
 	unsigned		is_selfpowered:1;
 	unsigned		three_stage_setup:1;
 	unsigned		ep0_status_pending:1;
+	unsigned		ep0_bounced:1;
+	unsigned		ep0_expect_in:1;
 
+	enum dwc3_ep0_next	ep0_next_event;
 	enum dwc3_ep0_state	ep0state;
 	enum dwc3_link_state	link_state;
 	enum dwc3_device_state	dev_state;
@@ -634,6 +648,12 @@
 #define DEPEVT_STATUS_SHORT     (1 << 1)
 #define DEPEVT_STATUS_IOC       (1 << 2)
 #define DEPEVT_STATUS_LST	(1 << 3)
+
+/* Control-only Status */
+#define DEPEVT_STATUS_CONTROL_SETUP	0
+#define DEPEVT_STATUS_CONTROL_DATA	1
+#define DEPEVT_STATUS_CONTROL_STATUS	2
+
 	u32	parameters:16;
 } __packed;
 
diff --git a/drivers/usb/dwc3/debugfs.c b/drivers/usb/dwc3/debugfs.c
index 432df53..20d329f 100644
--- a/drivers/usb/dwc3/debugfs.c
+++ b/drivers/usb/dwc3/debugfs.c
@@ -406,91 +406,6 @@
 	.release		= single_release,
 };
 
-
-static int dwc3_send_testmode_cmd(struct dwc3 *dwc, int mode)
-{
-	u32 timeout = 250;
-
-	dwc3_writel(dwc->regs, DWC3_DGCMDPAR, mode);
-	dwc3_writel(dwc->regs, DWC3_DGCMD, DWC3_DGCMD_RUN_SOC_BUS_LOOPBACK |
-			DWC3_DEPCMD_CMDACT);
-	do {
-		u32 reg;
-
-		reg = dwc3_readl(dwc->regs, DWC3_DGCMD);
-		if (!(reg & DWC3_DEPCMD_CMDACT))
-			return 0;
-		timeout--;
-		if (!timeout)
-			return -ETIMEDOUT;
-		mdelay(1);
-	} while (1);
-}
-
-static struct dwc3_trb_hw trb_0 __aligned(16);
-static struct dwc3_trb_hw trb_1 __aligned(16);
-
-#define BUF_SIZE	4096
-static int dwc3_testmode_open(struct inode *inode, struct file *file)
-{
-	struct dwc3 *dwc = inode->i_private;
-	struct dwc3_gadget_ep_cmd_params par0;
-	struct dwc3_gadget_ep_cmd_params par1;
-	struct dwc3_trb         trb;
-	int ret;
-	u8 *buf0;
-	u8 *buf1;
-
-	buf0 = kmalloc(BUF_SIZE, GFP_KERNEL);
-	if (!buf0)
-		return -ENOMEM;
-	buf1 = kmalloc(BUF_SIZE, GFP_KERNEL);
-	if (!buf1)
-		return -ENOMEM;
-
-	memset(buf0, 0xaa, BUF_SIZE);
-	memset(buf1, 0x33, BUF_SIZE);
-
-	memset(&trb, 0, sizeof(trb));
-	memset(&par0, 0, sizeof(par0));
-	memset(&par1, 0, sizeof(par1));
-
-	trb.lst = 1;
-	trb.trbctl = DWC3_TRBCTL_NORMAL;
-	trb.length = BUF_SIZE;
-	trb.hwo = 1;
-
-	trb.bplh = virt_to_phys(buf0);
-	dwc3_trb_to_hw(&trb, &trb_0);
-
-	trb.bplh = virt_to_phys(buf1);
-	dwc3_trb_to_hw(&trb, &trb_1);
-
-	par0.param0.depstrtxfer.transfer_desc_addr_high =
-		upper_32_bits(virt_to_phys(&trb_0));
-	par0.param1.depstrtxfer.transfer_desc_addr_low =
-		lower_32_bits(virt_to_phys(&trb_0));
-
-	par1.param0.depstrtxfer.transfer_desc_addr_high =
-		upper_32_bits(virt_to_phys(&trb_1));
-	par1.param1.depstrtxfer.transfer_desc_addr_low =
-		lower_32_bits(virt_to_phys(&trb_1));
-
-	dwc3_send_testmode_cmd(dwc, 1);
-
-	ret = dwc3_send_gadget_ep_cmd(dwc, 0, DWC3_DEPCMD_STARTTRANSFER, &par0);
-	ret = dwc3_send_gadget_ep_cmd(dwc, 1, DWC3_DEPCMD_STARTTRANSFER, &par1);
-
-	dwc3_send_testmode_cmd(dwc, 0);
-	return -EBUSY;
-}
-
-static const struct file_operations dwc3_testmode_fops = {
-	.open			= dwc3_testmode_open,
-	.read			= seq_read,
-	.release		= single_release,
-};
-
 int __devinit dwc3_debugfs_init(struct dwc3 *dwc)
 {
 	struct dentry		*root;
@@ -511,13 +426,6 @@
 		ret = PTR_ERR(file);
 		goto err1;
 	}
-	file = debugfs_create_file("testmode", S_IRUGO, root, dwc,
-			&dwc3_testmode_fops);
-	if (IS_ERR(file)) {
-		ret = PTR_ERR(file);
-		goto err1;
-	}
-
 	return 0;
 
 err1:
diff --git a/drivers/usb/dwc3/dwc3-omap.c b/drivers/usb/dwc3/dwc3-omap.c
index 08fffe6..72cc92b 100644
--- a/drivers/usb/dwc3/dwc3-omap.c
+++ b/drivers/usb/dwc3/dwc3-omap.c
@@ -37,11 +37,13 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/interrupt.h>
 #include <linux/spinlock.h>
 #include <linux/platform_device.h>
+#include <linux/platform_data/dwc3-omap.h>
 #include <linux/dma-mapping.h>
 #include <linux/ioport.h>
 #include <linux/io.h>
@@ -74,8 +76,23 @@
 /* SYSCONFIG REGISTER */
 #define USBOTGSS_SYSCONFIG_DMADISABLE		(1 << 16)
 #define USBOTGSS_SYSCONFIG_STANDBYMODE(x)	((x) << 4)
+
+#define USBOTGSS_STANDBYMODE_FORCE_STANDBY	0
+#define USBOTGSS_STANDBYMODE_NO_STANDBY		1
+#define USBOTGSS_STANDBYMODE_SMART_STANDBY	2
+#define USBOTGSS_STANDBYMODE_SMART_WAKEUP	3
+
+#define USBOTGSS_STANDBYMODE_MASK		(0x03 << 4)
+
 #define USBOTGSS_SYSCONFIG_IDLEMODE(x)		((x) << 2)
 
+#define USBOTGSS_IDLEMODE_FORCE_IDLE		0
+#define USBOTGSS_IDLEMODE_NO_IDLE		1
+#define USBOTGSS_IDLEMODE_SMART_IDLE		2
+#define USBOTGSS_IDLEMODE_SMART_WAKEUP		3
+
+#define USBOTGSS_IDLEMODE_MASK			(0x03 << 2)
+
 /* IRQ_EOI REGISTER */
 #define USBOTGSS_IRQ_EOI_LINE_NUMBER		(1 << 0)
 
@@ -125,106 +142,51 @@
 	u32			dma_status:1;
 };
 
-#ifdef CONFIG_PM
-static int dwc3_omap_suspend(struct device *dev)
-{
-	struct dwc3_omap	*omap = dev_get_drvdata(dev);
-
-	memcpy_fromio(omap->context, omap->base, omap->resource_size);
-
-	return 0;
-}
-
-static int dwc3_omap_resume(struct device *dev)
-{
-	struct dwc3_omap	*omap = dev_get_drvdata(dev);
-
-	memcpy_toio(omap->base, omap->context, omap->resource_size);
-
-	return 0;
-}
-
-static int dwc3_omap_idle(struct device *dev)
-{
-	struct dwc3_omap	*omap = dev_get_drvdata(dev);
-	u32			reg;
-
-	/* stop DMA Engine */
-	reg = dwc3_readl(omap->base, USBOTGSS_SYSCONFIG);
-	reg &= ~(USBOTGSS_SYSCONFIG_DMADISABLE);
-	dwc3_writel(omap->base, USBOTGSS_SYSCONFIG, reg);
-
-	return 0;
-}
-
-static UNIVERSAL_DEV_PM_OPS(dwc3_omap_pm_ops, dwc3_omap_suspend,
-		dwc3_omap_resume, dwc3_omap_idle);
-
-#define DEV_PM_OPS	(&dwc3_omap_pm_ops)
-#else
-#define DEV_PM_OPS	NULL
-#endif
-
 static irqreturn_t dwc3_omap_interrupt(int irq, void *_omap)
 {
 	struct dwc3_omap	*omap = _omap;
 	u32			reg;
-	u32			ctrl;
 
 	spin_lock(&omap->lock);
 
 	reg = dwc3_readl(omap->base, USBOTGSS_IRQSTATUS_1);
-	ctrl = dwc3_readl(omap->base, USBOTGSS_UTMI_OTG_CTRL);
 
 	if (reg & USBOTGSS_IRQ1_DMADISABLECLR) {
-		dev_dbg(omap->base, "DMA Disable was Cleared\n");
+		dev_dbg(omap->dev, "DMA Disable was Cleared\n");
 		omap->dma_status = false;
 	}
 
 	if (reg & USBOTGSS_IRQ1_OEVT)
-		dev_dbg(omap->base, "OTG Event\n");
+		dev_dbg(omap->dev, "OTG Event\n");
 
-	if (reg & USBOTGSS_IRQ1_DRVVBUS_RISE) {
-		dev_dbg(omap->base, "DRVVBUS Rise\n");
-		ctrl |= USBOTGSS_UTMI_OTG_CTRL_DRVVBUS;
-	}
+	if (reg & USBOTGSS_IRQ1_DRVVBUS_RISE)
+		dev_dbg(omap->dev, "DRVVBUS Rise\n");
 
-	if (reg & USBOTGSS_IRQ1_CHRGVBUS_RISE) {
-		dev_dbg(omap->base, "CHRGVBUS Rise\n");
-		ctrl |= USBOTGSS_UTMI_OTG_CTRL_CHRGVBUS;
-	}
+	if (reg & USBOTGSS_IRQ1_CHRGVBUS_RISE)
+		dev_dbg(omap->dev, "CHRGVBUS Rise\n");
 
-	if (reg & USBOTGSS_IRQ1_DISCHRGVBUS_RISE) {
-		dev_dbg(omap->base, "DISCHRGVBUS Rise\n");
-		ctrl |= USBOTGSS_UTMI_OTG_CTRL_DISCHRGVBUS;
-	}
+	if (reg & USBOTGSS_IRQ1_DISCHRGVBUS_RISE)
+		dev_dbg(omap->dev, "DISCHRGVBUS Rise\n");
 
-	if (reg & USBOTGSS_IRQ1_IDPULLUP_RISE) {
-		dev_dbg(omap->base, "IDPULLUP Rise\n");
-		ctrl |= USBOTGSS_UTMI_OTG_CTRL_IDPULLUP;
-	}
+	if (reg & USBOTGSS_IRQ1_IDPULLUP_RISE)
+		dev_dbg(omap->dev, "IDPULLUP Rise\n");
 
-	if (reg & USBOTGSS_IRQ1_DRVVBUS_FALL) {
-		dev_dbg(omap->base, "DRVVBUS Fall\n");
-		ctrl &= ~USBOTGSS_UTMI_OTG_CTRL_DRVVBUS;
-	}
+	if (reg & USBOTGSS_IRQ1_DRVVBUS_FALL)
+		dev_dbg(omap->dev, "DRVVBUS Fall\n");
 
-	if (reg & USBOTGSS_IRQ1_CHRGVBUS_FALL) {
-		dev_dbg(omap->base, "CHRGVBUS Fall\n");
-		ctrl &= ~USBOTGSS_UTMI_OTG_CTRL_CHRGVBUS;
-	}
+	if (reg & USBOTGSS_IRQ1_CHRGVBUS_FALL)
+		dev_dbg(omap->dev, "CHRGVBUS Fall\n");
 
-	if (reg & USBOTGSS_IRQ1_DISCHRGVBUS_FALL) {
-		dev_dbg(omap->base, "DISCHRGVBUS Fall\n");
-		ctrl &= ~USBOTGSS_UTMI_OTG_CTRL_DISCHRGVBUS;
-	}
+	if (reg & USBOTGSS_IRQ1_DISCHRGVBUS_FALL)
+		dev_dbg(omap->dev, "DISCHRGVBUS Fall\n");
 
-	if (reg & USBOTGSS_IRQ1_IDPULLUP_FALL) {
-		dev_dbg(omap->base, "IDPULLUP Fall\n");
-		ctrl &= ~USBOTGSS_UTMI_OTG_CTRL_IDPULLUP;
-	}
+	if (reg & USBOTGSS_IRQ1_IDPULLUP_FALL)
+		dev_dbg(omap->dev, "IDPULLUP Fall\n");
 
-	dwc3_writel(omap->base, USBOTGSS_UTMI_OTG_CTRL, ctrl);
+	dwc3_writel(omap->base, USBOTGSS_IRQSTATUS_1, reg);
+
+	reg = dwc3_readl(omap->base, USBOTGSS_IRQSTATUS_0);
+	dwc3_writel(omap->base, USBOTGSS_IRQSTATUS_0, reg);
 
 	spin_unlock(&omap->lock);
 
@@ -233,6 +195,7 @@
 
 static int __devinit dwc3_omap_probe(struct platform_device *pdev)
 {
+	struct dwc3_omap_data	*pdata = pdev->dev.platform_data;
 	struct platform_device	*dwc3;
 	struct dwc3_omap	*omap;
 	struct resource		*res;
@@ -298,12 +261,41 @@
 	omap->base	= base;
 	omap->dwc3	= dwc3;
 
+	reg = dwc3_readl(omap->base, USBOTGSS_UTMI_OTG_STATUS);
+
+	if (!pdata) {
+		dev_dbg(&pdev->dev, "missing platform data\n");
+	} else {
+		switch (pdata->utmi_mode) {
+		case DWC3_OMAP_UTMI_MODE_SW:
+			reg |= USBOTGSS_UTMI_OTG_STATUS_SW_MODE;
+			break;
+		case DWC3_OMAP_UTMI_MODE_HW:
+			reg &= ~USBOTGSS_UTMI_OTG_STATUS_SW_MODE;
+			break;
+		default:
+			dev_dbg(&pdev->dev, "UNKNOWN utmi mode %d\n",
+					pdata->utmi_mode);
+		}
+	}
+
+	dwc3_writel(omap->base, USBOTGSS_UTMI_OTG_STATUS, reg);
+
 	/* check the DMA Status */
 	reg = dwc3_readl(omap->base, USBOTGSS_SYSCONFIG);
 	omap->dma_status = !!(reg & USBOTGSS_SYSCONFIG_DMADISABLE);
 
+	/* Set No-Idle and No-Standby */
+	reg &= ~(USBOTGSS_STANDBYMODE_MASK
+			| USBOTGSS_IDLEMODE_MASK);
+
+	reg |= (USBOTGSS_SYSCONFIG_STANDBYMODE(USBOTGSS_STANDBYMODE_NO_STANDBY)
+		| USBOTGSS_SYSCONFIG_IDLEMODE(USBOTGSS_IDLEMODE_NO_IDLE));
+
+	dwc3_writel(omap->base, USBOTGSS_SYSCONFIG, reg);
+
 	ret = request_irq(omap->irq, dwc3_omap_interrupt, 0,
-			"dwc3-wrapper", omap);
+			"dwc3-omap", omap);
 	if (ret) {
 		dev_err(&pdev->dev, "failed to request IRQ #%d --> %d\n",
 				omap->irq, ret);
@@ -311,10 +303,10 @@
 	}
 
 	/* enable all IRQs */
-	dwc3_writel(omap->base, USBOTGSS_IRQENABLE_SET_0, 0x01);
+	reg = USBOTGSS_IRQO_COREIRQ_ST;
+	dwc3_writel(omap->base, USBOTGSS_IRQENABLE_SET_0, reg);
 
-	reg = (USBOTGSS_IRQ1_DMADISABLECLR |
-			USBOTGSS_IRQ1_OEVT |
+	reg = (USBOTGSS_IRQ1_OEVT |
 			USBOTGSS_IRQ1_DRVVBUS_RISE |
 			USBOTGSS_IRQ1_CHRGVBUS_RISE |
 			USBOTGSS_IRQ1_DISCHRGVBUS_RISE |
@@ -388,7 +380,6 @@
 	.remove		= __devexit_p(dwc3_omap_remove),
 	.driver		= {
 		.name	= "omap-dwc3",
-		.pm	= DEV_PM_OPS,
 		.of_match_table	= of_dwc3_matach,
 	},
 };
diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c
index 4698fe0..b66d969 100644
--- a/drivers/usb/dwc3/ep0.c
+++ b/drivers/usb/dwc3/ep0.c
@@ -62,33 +62,19 @@
 	switch (state) {
 	case EP0_UNCONNECTED:
 		return "Unconnected";
-	case EP0_IDLE:
-		return "Idle";
-	case EP0_IN_DATA_PHASE:
-		return "IN Data Phase";
-	case EP0_OUT_DATA_PHASE:
-		return "OUT Data Phase";
-	case EP0_IN_WAIT_GADGET:
-		return "IN Wait Gadget";
-	case EP0_OUT_WAIT_GADGET:
-		return "OUT Wait Gadget";
-	case EP0_IN_WAIT_NRDY:
-		return "IN Wait NRDY";
-	case EP0_OUT_WAIT_NRDY:
-		return "OUT Wait NRDY";
-	case EP0_IN_STATUS_PHASE:
-		return "IN Status Phase";
-	case EP0_OUT_STATUS_PHASE:
-		return "OUT Status Phase";
-	case EP0_STALL:
-		return "Stall";
+	case EP0_SETUP_PHASE:
+		return "Setup Phase";
+	case EP0_DATA_PHASE:
+		return "Data Phase";
+	case EP0_STATUS_PHASE:
+		return "Status Phase";
 	default:
 		return "UNKNOWN";
 	}
 }
 
 static int dwc3_ep0_start_trans(struct dwc3 *dwc, u8 epnum, dma_addr_t buf_dma,
-		u32 len)
+		u32 len, u32 type)
 {
 	struct dwc3_gadget_ep_cmd_params params;
 	struct dwc3_trb_hw		*trb_hw;
@@ -98,52 +84,15 @@
 	int				ret;
 
 	dep = dwc->eps[epnum];
+	if (dep->flags & DWC3_EP_BUSY) {
+		dev_vdbg(dwc->dev, "%s: still busy\n", dep->name);
+		return 0;
+	}
 
 	trb_hw = dwc->ep0_trb;
 	memset(&trb, 0, sizeof(trb));
 
-	switch (dwc->ep0state) {
-	case EP0_IDLE:
-		trb.trbctl = DWC3_TRBCTL_CONTROL_SETUP;
-		break;
-
-	case EP0_IN_WAIT_NRDY:
-	case EP0_OUT_WAIT_NRDY:
-	case EP0_IN_STATUS_PHASE:
-	case EP0_OUT_STATUS_PHASE:
-		if (dwc->three_stage_setup)
-			trb.trbctl = DWC3_TRBCTL_CONTROL_STATUS3;
-		else
-			trb.trbctl = DWC3_TRBCTL_CONTROL_STATUS2;
-
-		if (dwc->ep0state == EP0_IN_WAIT_NRDY)
-			dwc->ep0state = EP0_IN_STATUS_PHASE;
-		else if (dwc->ep0state == EP0_OUT_WAIT_NRDY)
-			dwc->ep0state = EP0_OUT_STATUS_PHASE;
-		break;
-
-	case EP0_IN_WAIT_GADGET:
-		dwc->ep0state = EP0_IN_WAIT_NRDY;
-		return 0;
-		break;
-
-	case EP0_OUT_WAIT_GADGET:
-		dwc->ep0state = EP0_OUT_WAIT_NRDY;
-		return 0;
-
-		break;
-
-	case EP0_IN_DATA_PHASE:
-	case EP0_OUT_DATA_PHASE:
-		trb.trbctl = DWC3_TRBCTL_CONTROL_DATA;
-		break;
-
-	default:
-		dev_err(dwc->dev, "%s() can't in state %d\n", __func__,
-				dwc->ep0state);
-		return -EINVAL;
-	}
-
+	trb.trbctl = type;
 	trb.bplh = buf_dma;
 	trb.length = len;
 
@@ -167,31 +116,58 @@
 		return ret;
 	}
 
+	dep->flags |= DWC3_EP_BUSY;
 	dep->res_trans_idx = dwc3_gadget_ep_get_transfer_index(dwc,
 			dep->number);
 
+	dwc->ep0_next_event = DWC3_EP0_COMPLETE;
+
 	return 0;
 }
 
 static int __dwc3_gadget_ep0_queue(struct dwc3_ep *dep,
 		struct dwc3_request *req)
 {
-	struct dwc3		*dwc = dep->dwc;
-	int			ret;
+	int			ret = 0;
 
 	req->request.actual	= 0;
 	req->request.status	= -EINPROGRESS;
-	req->direction		= dep->direction;
 	req->epnum		= dep->number;
 
 	list_add_tail(&req->list, &dep->request_list);
-	dwc3_map_buffer_to_dma(req);
 
-	ret = dwc3_ep0_start_trans(dwc, dep->number, req->request.dma,
-			req->request.length);
-	if (ret < 0) {
-		list_del(&req->list);
-		dwc3_unmap_buffer_from_dma(req);
+	/*
+	 * Gadget driver might not be quick enough to queue a request
+	 * before we get a Transfer Not Ready event on this endpoint.
+	 *
+	 * In that case, we will set DWC3_EP_PENDING_REQUEST. When that
+	 * flag is set, it's telling us that as soon as Gadget queues the
+	 * required request, we should kick the transfer here because the
+	 * IRQ we were waiting for is long gone.
+	 */
+	if (dep->flags & DWC3_EP_PENDING_REQUEST) {
+		struct dwc3	*dwc = dep->dwc;
+		unsigned	direction;
+		u32		type;
+
+		direction = !!(dep->flags & DWC3_EP0_DIR_IN);
+
+		if (dwc->ep0state == EP0_STATUS_PHASE) {
+			type = dwc->three_stage_setup
+				? DWC3_TRBCTL_CONTROL_STATUS3
+				: DWC3_TRBCTL_CONTROL_STATUS2;
+		} else if (dwc->ep0state == EP0_DATA_PHASE) {
+			type = DWC3_TRBCTL_CONTROL_DATA;
+		} else {
+			/* should never happen */
+			WARN_ON(1);
+			return 0;
+		}
+
+		ret = dwc3_ep0_start_trans(dwc, direction,
+				req->request.dma, req->request.length, type);
+		dep->flags &= ~(DWC3_EP_PENDING_REQUEST |
+				DWC3_EP0_DIR_IN);
 	}
 
 	return ret;
@@ -208,24 +184,6 @@
 
 	int				ret;
 
-	switch (dwc->ep0state) {
-	case EP0_IN_DATA_PHASE:
-	case EP0_IN_WAIT_GADGET:
-	case EP0_IN_WAIT_NRDY:
-	case EP0_IN_STATUS_PHASE:
-		dep = dwc->eps[1];
-		break;
-
-	case EP0_OUT_DATA_PHASE:
-	case EP0_OUT_WAIT_GADGET:
-	case EP0_OUT_WAIT_NRDY:
-	case EP0_OUT_STATUS_PHASE:
-		dep = dwc->eps[0];
-		break;
-	default:
-		return -EINVAL;
-	}
-
 	spin_lock_irqsave(&dwc->lock, flags);
 	if (!dep->desc) {
 		dev_dbg(dwc->dev, "trying to queue request %p to disabled %s\n",
@@ -256,53 +214,32 @@
 
 static void dwc3_ep0_stall_and_restart(struct dwc3 *dwc)
 {
+	struct dwc3_ep		*dep = dwc->eps[0];
+
 	/* stall is always issued on EP0 */
 	__dwc3_gadget_ep_set_halt(dwc->eps[0], 1);
-	dwc->eps[0]->flags &= ~DWC3_EP_STALL;
-	dwc->ep0state = EP0_IDLE;
+	dwc->eps[0]->flags = DWC3_EP_ENABLED;
+
+	if (!list_empty(&dep->request_list)) {
+		struct dwc3_request	*req;
+
+		req = next_request(&dep->request_list);
+		dwc3_gadget_giveback(dep, req, -ECONNRESET);
+	}
+
+	dwc->ep0state = EP0_SETUP_PHASE;
 	dwc3_ep0_out_start(dwc);
 }
 
 void dwc3_ep0_out_start(struct dwc3 *dwc)
 {
-	struct dwc3_ep			*dep;
 	int				ret;
 
-	dep = dwc->eps[0];
-
-	ret = dwc3_ep0_start_trans(dwc, 0, dwc->ctrl_req_addr, 8);
+	ret = dwc3_ep0_start_trans(dwc, 0, dwc->ctrl_req_addr, 8,
+			DWC3_TRBCTL_CONTROL_SETUP);
 	WARN_ON(ret < 0);
 }
 
-/*
- * Send a zero length packet for the status phase of the control transfer
- */
-static void dwc3_ep0_do_setup_status(struct dwc3 *dwc,
-		const struct dwc3_event_depevt *event)
-{
-	struct dwc3_ep			*dep;
-	int				ret;
-	u32				epnum;
-
-	epnum = event->endpoint_number;
-	dep = dwc->eps[epnum];
-
-	if (epnum)
-		dwc->ep0state = EP0_IN_STATUS_PHASE;
-	else
-		dwc->ep0state = EP0_OUT_STATUS_PHASE;
-
-	/*
-	 * Not sure Why I need a buffer for a zero transfer. Maybe the
-	 * HW reacts strange on a NULL pointer
-	 */
-	ret = dwc3_ep0_start_trans(dwc, epnum, dwc->ctrl_req_addr, 0);
-	if (ret) {
-		dev_dbg(dwc->dev, "failed to start transfer, stalling\n");
-		dwc3_ep0_stall_and_restart(dwc);
-	}
-}
-
 static struct dwc3_ep *dwc3_wIndex_to_dep(struct dwc3 *dwc, __le16 wIndex_le)
 {
 	struct dwc3_ep		*dep;
@@ -322,16 +259,9 @@
 
 static void dwc3_ep0_send_status_response(struct dwc3 *dwc)
 {
-	u32 epnum;
-
-	if (dwc->ep0state == EP0_IN_DATA_PHASE)
-		epnum = 1;
-	else
-		epnum = 0;
-
-	dwc3_ep0_start_trans(dwc, epnum, dwc->ctrl_req_addr,
-			dwc->ep0_usb_req.length);
-	dwc->ep0_status_pending = 1;
+	dwc3_ep0_start_trans(dwc, 1, dwc->setup_buf_addr,
+			dwc->ep0_usb_req.length,
+			DWC3_TRBCTL_CONTROL_DATA);
 }
 
 /*
@@ -376,7 +306,7 @@
 	response_pkt = (__le16 *) dwc->setup_buf;
 	*response_pkt = cpu_to_le16(usb_status);
 	dwc->ep0_usb_req.length = sizeof(*response_pkt);
-	dwc3_ep0_send_status_response(dwc);
+	dwc->ep0_status_pending = 1;
 
 	return 0;
 }
@@ -486,8 +416,6 @@
 		return -EINVAL;
 	};
 
-	dwc->ep0state = EP0_IN_WAIT_NRDY;
-
 	return 0;
 }
 
@@ -522,7 +450,7 @@
 		ret = -EINVAL;
 		break;
 	}
-	dwc->ep0state = EP0_IN_WAIT_NRDY;
+
 	return ret;
 }
 
@@ -610,16 +538,15 @@
 
 	len = le16_to_cpu(ctrl->wLength);
 	if (!len) {
-		dwc->ep0state = EP0_IN_WAIT_GADGET;
 		dwc->three_stage_setup = 0;
+		dwc->ep0_next_event = DWC3_EP0_NRDY_STATUS;
 	} else {
 		dwc->three_stage_setup = 1;
-		if (ctrl->bRequestType & USB_DIR_IN)
-			dwc->ep0state = EP0_IN_DATA_PHASE;
-		else
-			dwc->ep0state = EP0_OUT_DATA_PHASE;
+		dwc->ep0_next_event = DWC3_EP0_NRDY_DATA;
 	}
 
+	dwc->ep0_expect_in = !!(ctrl->bRequestType & USB_DIR_IN);
+
 	if ((ctrl->bRequestType & USB_TYPE_MASK) == USB_TYPE_STANDARD)
 		ret = dwc3_ep0_std_request(dwc, ctrl);
 	else
@@ -639,14 +566,16 @@
 	struct usb_request	*ur;
 	struct dwc3_trb		trb;
 	struct dwc3_ep		*dep;
-	u32			transfered;
+	u32			transferred;
 	u8			epnum;
 
 	epnum = event->endpoint_number;
 	dep = dwc->eps[epnum];
 
+	dwc->ep0_next_event = DWC3_EP0_NRDY_STATUS;
+
 	if (!dwc->ep0_status_pending) {
-		r = next_request(&dep->request_list);
+		r = next_request(&dwc->eps[0]->request_list);
 		ur = &r->request;
 	} else {
 		ur = &dwc->ep0_usb_req;
@@ -655,8 +584,17 @@
 
 	dwc3_trb_to_nat(dwc->ep0_trb, &trb);
 
-	transfered = ur->length - trb.length;
-	ur->actual += transfered;
+	if (dwc->ep0_bounced) {
+		struct dwc3_ep	*ep0 = dwc->eps[0];
+
+		transferred = min_t(u32, ur->length,
+				ep0->endpoint.maxpacket - trb.length);
+		memcpy(ur->buf, dwc->ep0_bounce, transferred);
+		dwc->ep0_bounced = false;
+	} else {
+		transferred = ur->length - trb.length;
+		ur->actual += transferred;
+	}
 
 	if ((epnum & 1) && ur->actual < ur->length) {
 		/* for some reason we did not get everything out */
@@ -668,12 +606,6 @@
 		 * handle the case where we have to send a zero packet. This
 		 * seems to be case when req.length > maxpacket. Could it be?
 		 */
-		/* The transfer is complete, wait for HOST */
-		if (epnum & 1)
-			dwc->ep0state = EP0_IN_WAIT_NRDY;
-		else
-			dwc->ep0state = EP0_OUT_WAIT_NRDY;
-
 		if (r)
 			dwc3_gadget_giveback(dep, r, 0);
 	}
@@ -684,10 +616,8 @@
 {
 	struct dwc3_request	*r;
 	struct dwc3_ep		*dep;
-	u8			epnum;
 
-	epnum = event->endpoint_number;
-	dep = dwc->eps[epnum];
+	dep = dwc->eps[0];
 
 	if (!list_empty(&dep->request_list)) {
 		r = next_request(&dep->request_list);
@@ -695,62 +625,170 @@
 		dwc3_gadget_giveback(dep, r, 0);
 	}
 
-	dwc->ep0state = EP0_IDLE;
+	dwc->ep0state = EP0_SETUP_PHASE;
 	dwc3_ep0_out_start(dwc);
 }
 
 static void dwc3_ep0_xfer_complete(struct dwc3 *dwc,
 			const struct dwc3_event_depevt *event)
 {
+	struct dwc3_ep		*dep = dwc->eps[event->endpoint_number];
+
+	dep->flags &= ~DWC3_EP_BUSY;
+
 	switch (dwc->ep0state) {
-	case EP0_IDLE:
+	case EP0_SETUP_PHASE:
+		dev_vdbg(dwc->dev, "Inspecting Setup Bytes\n");
 		dwc3_ep0_inspect_setup(dwc, event);
 		break;
 
-	case EP0_IN_DATA_PHASE:
-	case EP0_OUT_DATA_PHASE:
+	case EP0_DATA_PHASE:
+		dev_vdbg(dwc->dev, "Data Phase\n");
 		dwc3_ep0_complete_data(dwc, event);
 		break;
 
-	case EP0_IN_STATUS_PHASE:
-	case EP0_OUT_STATUS_PHASE:
+	case EP0_STATUS_PHASE:
+		dev_vdbg(dwc->dev, "Status Phase\n");
 		dwc3_ep0_complete_req(dwc, event);
 		break;
-
-	case EP0_IN_WAIT_NRDY:
-	case EP0_OUT_WAIT_NRDY:
-	case EP0_IN_WAIT_GADGET:
-	case EP0_OUT_WAIT_GADGET:
-	case EP0_UNCONNECTED:
-	case EP0_STALL:
-		break;
+	default:
+		WARN(true, "UNKNOWN ep0state %d\n", dwc->ep0state);
 	}
 }
 
+static void dwc3_ep0_do_control_setup(struct dwc3 *dwc,
+		const struct dwc3_event_depevt *event)
+{
+	dwc->ep0state = EP0_SETUP_PHASE;
+	dwc3_ep0_out_start(dwc);
+}
+
+static void dwc3_ep0_do_control_data(struct dwc3 *dwc,
+		const struct dwc3_event_depevt *event)
+{
+	struct dwc3_ep		*dep;
+	struct dwc3_request	*req;
+	int			ret;
+
+	dep = dwc->eps[0];
+	dwc->ep0state = EP0_DATA_PHASE;
+
+	if (dwc->ep0_status_pending) {
+		dwc3_ep0_send_status_response(dwc);
+		return;
+	}
+
+	if (list_empty(&dep->request_list)) {
+		dev_vdbg(dwc->dev, "pending request for EP0 Data phase\n");
+		dep->flags |= DWC3_EP_PENDING_REQUEST;
+
+		if (event->endpoint_number)
+			dep->flags |= DWC3_EP0_DIR_IN;
+		return;
+	}
+
+	req = next_request(&dep->request_list);
+	req->direction = !!event->endpoint_number;
+
+	dwc->ep0state = EP0_DATA_PHASE;
+	if (req->request.length == 0) {
+		ret = dwc3_ep0_start_trans(dwc, event->endpoint_number,
+				dwc->ctrl_req_addr, 0,
+				DWC3_TRBCTL_CONTROL_DATA);
+	} else if ((req->request.length % dep->endpoint.maxpacket)
+			&& (event->endpoint_number == 0)) {
+		dwc3_map_buffer_to_dma(req);
+
+		WARN_ON(req->request.length > dep->endpoint.maxpacket);
+
+		dwc->ep0_bounced = true;
+
+		/*
+		 * REVISIT in case request length is bigger than EP0
+		 * wMaxPacketSize, we will need two chained TRBs to handle
+		 * the transfer.
+		 */
+		ret = dwc3_ep0_start_trans(dwc, event->endpoint_number,
+				dwc->ep0_bounce_addr, dep->endpoint.maxpacket,
+				DWC3_TRBCTL_CONTROL_DATA);
+	} else {
+		dwc3_map_buffer_to_dma(req);
+
+		ret = dwc3_ep0_start_trans(dwc, event->endpoint_number,
+				req->request.dma, req->request.length,
+				DWC3_TRBCTL_CONTROL_DATA);
+	}
+
+	WARN_ON(ret < 0);
+}
+
+static void dwc3_ep0_do_control_status(struct dwc3 *dwc,
+		const struct dwc3_event_depevt *event)
+{
+	u32			type;
+	int			ret;
+
+	dwc->ep0state = EP0_STATUS_PHASE;
+
+	type = dwc->three_stage_setup ? DWC3_TRBCTL_CONTROL_STATUS3
+		: DWC3_TRBCTL_CONTROL_STATUS2;
+
+	ret = dwc3_ep0_start_trans(dwc, event->endpoint_number,
+			dwc->ctrl_req_addr, 0, type);
+
+	WARN_ON(ret < 0);
+}
+
 static void dwc3_ep0_xfernotready(struct dwc3 *dwc,
 		const struct dwc3_event_depevt *event)
 {
-	switch (dwc->ep0state) {
-	case EP0_IN_WAIT_GADGET:
-		dwc->ep0state = EP0_IN_WAIT_NRDY;
-		break;
-	case EP0_OUT_WAIT_GADGET:
-		dwc->ep0state = EP0_OUT_WAIT_NRDY;
+	switch (event->status) {
+	case DEPEVT_STATUS_CONTROL_SETUP:
+		dev_vdbg(dwc->dev, "Control Setup\n");
+		dwc3_ep0_do_control_setup(dwc, event);
 		break;
 
-	case EP0_IN_WAIT_NRDY:
-	case EP0_OUT_WAIT_NRDY:
-		dwc3_ep0_do_setup_status(dwc, event);
+	case DEPEVT_STATUS_CONTROL_DATA:
+		dev_vdbg(dwc->dev, "Control Data\n");
+
+		if (dwc->ep0_next_event != DWC3_EP0_NRDY_DATA) {
+			dev_vdbg(dwc->dev, "Expected %d got %d\n",
+					DEPEVT_STATUS_CONTROL_DATA,
+					event->status);
+
+			dwc3_ep0_stall_and_restart(dwc);
+			return;
+		}
+
+		/*
+		 * One of the possible error cases is when Host _does_
+		 * request for Data Phase, but it does so on the wrong
+		 * direction.
+		 *
+		 * Here, we already know ep0_next_event is DATA (see above),
+		 * so we only need to check for direction.
+		 */
+		if (dwc->ep0_expect_in != event->endpoint_number) {
+			dev_vdbg(dwc->dev, "Wrong direction for Data phase\n");
+			dwc3_ep0_stall_and_restart(dwc);
+			return;
+		}
+
+		dwc3_ep0_do_control_data(dwc, event);
 		break;
 
-	case EP0_IDLE:
-	case EP0_IN_STATUS_PHASE:
-	case EP0_OUT_STATUS_PHASE:
-	case EP0_IN_DATA_PHASE:
-	case EP0_OUT_DATA_PHASE:
-	case EP0_UNCONNECTED:
-	case EP0_STALL:
-		break;
+	case DEPEVT_STATUS_CONTROL_STATUS:
+		dev_vdbg(dwc->dev, "Control Status\n");
+
+		if (dwc->ep0_next_event != DWC3_EP0_NRDY_STATUS) {
+			dev_vdbg(dwc->dev, "Expected %d got %d\n",
+					DEPEVT_STATUS_CONTROL_STATUS,
+					event->status);
+
+			dwc3_ep0_stall_and_restart(dwc);
+			return;
+		}
+		dwc3_ep0_do_control_status(dwc, event);
 	}
 }
 
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index cebaef7..859257a 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -61,16 +61,16 @@
 {
 	struct dwc3			*dwc = req->dep->dwc;
 
+	if (req->request.length == 0) {
+		/* req->request.dma = dwc->setup_buf_addr; */
+		return;
+	}
+
 	if (req->request.dma == DMA_ADDR_INVALID) {
 		req->request.dma = dma_map_single(dwc->dev, req->request.buf,
 				req->request.length, req->direction
 				? DMA_TO_DEVICE : DMA_FROM_DEVICE);
 		req->mapped = true;
-	} else {
-		dma_sync_single_for_device(dwc->dev, req->request.dma,
-				req->request.length, req->direction
-				? DMA_TO_DEVICE : DMA_FROM_DEVICE);
-		req->mapped = false;
 	}
 }
 
@@ -78,15 +78,17 @@
 {
 	struct dwc3			*dwc = req->dep->dwc;
 
+	if (req->request.length == 0) {
+		req->request.dma = DMA_ADDR_INVALID;
+		return;
+	}
+
 	if (req->mapped) {
 		dma_unmap_single(dwc->dev, req->request.dma,
 				req->request.length, req->direction
 				? DMA_TO_DEVICE : DMA_FROM_DEVICE);
 		req->mapped = 0;
-	} else {
-		dma_sync_single_for_cpu(dwc->dev, req->request.dma,
-				req->request.length, req->direction
-				? DMA_TO_DEVICE : DMA_FROM_DEVICE);
+		req->request.dma = DMA_ADDR_INVALID;
 	}
 }
 
@@ -152,7 +154,7 @@
 		unsigned cmd, struct dwc3_gadget_ep_cmd_params *params)
 {
 	struct dwc3_ep		*dep = dwc->eps[ep];
-	unsigned long		timeout = 500;
+	u32			timeout = 500;
 	u32			reg;
 
 	dev_vdbg(dwc->dev, "%s: cmd '%s' params %08x %08x %08x\n",
@@ -168,13 +170,12 @@
 	do {
 		reg = dwc3_readl(dwc->regs, DWC3_DEPCMD(ep));
 		if (!(reg & DWC3_DEPCMD_CMDACT)) {
-			dev_vdbg(dwc->dev, "CMD Compl Status %d DEPCMD %04x\n",
-					((reg & 0xf000) >> 12), reg);
+			dev_vdbg(dwc->dev, "Command Complete --> %d\n",
+					DWC3_DEPCMD_STATUS(reg));
 			return 0;
 		}
 
 		/*
-		 * XXX Figure out a sane timeout here. 500ms is way too much.
 		 * We can't sleep here, because it is also called from
 		 * interrupt context.
 		 */
@@ -182,7 +183,7 @@
 		if (!timeout)
 			return -ETIMEDOUT;
 
-		mdelay(1);
+		udelay(1);
 	} while (1);
 }
 
@@ -358,34 +359,36 @@
 	return 0;
 }
 
-static void dwc3_gadget_nuke_reqs(struct dwc3_ep *dep, const int status)
+static void dwc3_stop_active_transfer(struct dwc3 *dwc, u32 epnum);
+static void dwc3_remove_requests(struct dwc3 *dwc, struct dwc3_ep *dep)
 {
 	struct dwc3_request		*req;
 
+	if (!list_empty(&dep->req_queued))
+		dwc3_stop_active_transfer(dwc, dep->number);
+
 	while (!list_empty(&dep->request_list)) {
 		req = next_request(&dep->request_list);
 
-		dwc3_gadget_giveback(dep, req, status);
+		dwc3_gadget_giveback(dep, req, -ESHUTDOWN);
 	}
-	/* nuke queued TRBs as well on command complete */
-	dep->flags |= DWC3_EP_WILL_SHUTDOWN;
 }
 
 /**
  * __dwc3_gadget_ep_disable - Disables a HW endpoint
  * @dep: the endpoint to disable
  *
- * Caller should take care of locking
+ * This function also removes requests which are currently processed ny the
+ * hardware and those which are not yet scheduled.
+ * Caller should take care of locking.
  */
-static void dwc3_stop_active_transfer(struct dwc3 *dwc, u32 epnum);
 static int __dwc3_gadget_ep_disable(struct dwc3_ep *dep)
 {
 	struct dwc3		*dwc = dep->dwc;
 	u32			reg;
 
 	dep->flags &= ~DWC3_EP_ENABLED;
-	dwc3_stop_active_transfer(dwc, dep->number);
-	dwc3_gadget_nuke_reqs(dep, -ESHUTDOWN);
+	dwc3_remove_requests(dwc, dep);
 
 	reg = dwc3_readl(dwc->regs, DWC3_DALEPENA);
 	reg &= ~DWC3_DALEPENA_EP(dep->number);
@@ -632,7 +635,7 @@
 			break;
 
 		case USB_ENDPOINT_XFER_ISOC:
-			trb.trbctl = DWC3_TRBCTL_ISOCHRONOUS;
+			trb.trbctl = DWC3_TRBCTL_ISOCHRONOUS_FIRST;
 
 			/* IOC every DWC3_TRB_NUM / 4 so we can refill */
 			if (!(cur_slot % (DWC3_TRB_NUM / 4)))
@@ -870,8 +873,14 @@
 	memset(&params, 0x00, sizeof(params));
 
 	if (value) {
-		if (dep->number == 0 || dep->number == 1)
-			dwc->ep0state = EP0_STALL;
+		if (dep->number == 0 || dep->number == 1) {
+			/*
+			 * Whenever EP0 is stalled, we will restart
+			 * the state machine, thus moving back to
+			 * Setup Phase
+			 */
+			dwc->ep0state = EP0_SETUP_PHASE;
+		}
 
 		ret = dwc3_send_gadget_ep_cmd(dwc, dep->number,
 			DWC3_DEPCMD_SETSTALL, &params);
@@ -1063,7 +1072,7 @@
 static void dwc3_gadget_run_stop(struct dwc3 *dwc, int is_on)
 {
 	u32			reg;
-	unsigned long		timeout = 500;
+	u32			timeout = 500;
 
 	reg = dwc3_readl(dwc->regs, DWC3_DCTL);
 	if (is_on)
@@ -1082,13 +1091,10 @@
 			if (reg & DWC3_DSTS_DEVCTRLHLT)
 				break;
 		}
-		/*
-		 * XXX reduce the 500ms delay
-		 */
 		timeout--;
 		if (!timeout)
 			break;
-		mdelay(1);
+		udelay(1);
 	} while (1);
 
 	dev_vdbg(dwc->dev, "gadget %s data soft-%s\n",
@@ -1135,13 +1141,10 @@
 
 	reg = dwc3_readl(dwc->regs, DWC3_GCTL);
 
-	/*
-	 * REVISIT: power down scale might be different
-	 * depending on PHY used, need to pass that via platform_data
-	 */
-	reg |= DWC3_GCTL_PWRDNSCALE(0x61a)
-		| DWC3_GCTL_PRTCAPDIR(DWC3_GCTL_PRTCAP_DEVICE);
+	reg &= ~DWC3_GCTL_SCALEDOWN(3);
+	reg &= ~DWC3_GCTL_PRTCAPDIR(DWC3_GCTL_PRTCAP_OTG);
 	reg &= ~DWC3_GCTL_DISSCRAMBLE;
+	reg |= DWC3_GCTL_PRTCAPDIR(DWC3_GCTL_PRTCAP_DEVICE);
 
 	/*
 	 * WORKAROUND: DWC3 revisions <1.90a have a bug
@@ -1177,7 +1180,7 @@
 	}
 
 	/* begin to receive SETUP packets */
-	dwc->ep0state = EP0_IDLE;
+	dwc->ep0state = EP0_SETUP_PHASE;
 	dwc3_ep0_out_start(dwc);
 
 	spin_unlock_irqrestore(&dwc->lock, flags);
@@ -1309,11 +1312,17 @@
 
 		dwc3_trb_to_nat(req->trb, &trb);
 
-		if (trb.hwo) {
+		if (trb.hwo && status != -ESHUTDOWN)
+			/*
+			 * We continue despite the error. There is not much we
+			 * can do. If we don't clean in up we loop for ever. If
+			 * we skip the TRB than it gets overwritten reused after
+			 * a while since we use them in a ring buffer. a BUG()
+			 * would help. Lets hope that if this occures, someone
+			 * fixes the root cause instead of looking away :)
+			 */
 			dev_err(dwc->dev, "%s's TRB (%p) still owned by HW\n",
 					dep->name, req->trb);
-			continue;
-		}
 		count = trb.length;
 
 		if (dep->direction) {
@@ -1360,8 +1369,10 @@
 		status = -ECONNRESET;
 
 	clean_busy =  dwc3_cleanup_done_reqs(dwc, dep, event, status);
-	if (clean_busy)
+	if (clean_busy) {
 		dep->flags &= ~DWC3_EP_BUSY;
+		dep->res_trans_idx = 0;
+	}
 }
 
 static void dwc3_gadget_start_isoc(struct dwc3 *dwc,
@@ -1407,16 +1418,6 @@
 	dwc3_cleanup_done_reqs(dwc, dep, &mod_ev, -ESHUTDOWN);
 	dep->flags &= ~DWC3_EP_BUSY;
 	/* pending requets are ignored and are queued on XferNotReady */
-
-	if (dep->flags & DWC3_EP_WILL_SHUTDOWN) {
-		while (!list_empty(&dep->req_queued)) {
-			struct dwc3_request	*req;
-
-			req = next_request(&dep->req_queued);
-			dwc3_gadget_giveback(dep, req, -ESHUTDOWN);
-		}
-		dep->flags &= DWC3_EP_WILL_SHUTDOWN;
-	}
 }
 
 static void dwc3_ep_cmd_compl(struct dwc3_ep *dep,
@@ -1524,6 +1525,7 @@
 
 	dep = dwc->eps[epnum];
 
+	WARN_ON(!dep->res_trans_idx);
 	if (dep->res_trans_idx) {
 		cmd = DWC3_DEPCMD_ENDTRANSFER;
 		cmd |= DWC3_DEPCMD_HIPRI_FORCERM | DWC3_DEPCMD_CMDIOC;
@@ -1531,6 +1533,7 @@
 		memset(&params, 0, sizeof(params));
 		ret = dwc3_send_gadget_ep_cmd(dwc, dep->number, cmd, &params);
 		WARN_ON_ONCE(ret);
+		dep->res_trans_idx = 0;
 	}
 }
 
@@ -1545,7 +1548,7 @@
 		if (!(dep->flags & DWC3_EP_ENABLED))
 			continue;
 
-		__dwc3_gadget_ep_disable(dep);
+		dwc3_remove_requests(dwc, dep);
 	}
 }
 
@@ -1718,7 +1721,6 @@
 
 	memset(&params, 0x00, sizeof(params));
 
-	dwc->ep0state = EP0_IDLE;
 	reg = dwc3_readl(dwc->regs, DWC3_DSTS);
 	speed = reg & DWC3_DSTS_CONNECTSPD;
 	dwc->speed = speed;
@@ -1790,10 +1792,10 @@
 static void dwc3_gadget_linksts_change_interrupt(struct dwc3 *dwc,
 		unsigned int evtinfo)
 {
-	dev_vdbg(dwc->dev, "%s\n", __func__);
-
 	/*  The fith bit says SuperSpeed yes or no. */
 	dwc->link_state = evtinfo & DWC3_LINK_STATE_MASK;
+
+	dev_vdbg(dwc->dev, "%s link %d\n", __func__, dwc->link_state);
 }
 
 static void dwc3_gadget_interrupt(struct dwc3 *dwc,
@@ -1947,6 +1949,14 @@
 		goto err2;
 	}
 
+	dwc->ep0_bounce = dma_alloc_coherent(dwc->dev,
+			512, &dwc->ep0_bounce_addr, GFP_KERNEL);
+	if (!dwc->ep0_bounce) {
+		dev_err(dwc->dev, "failed to allocate ep0 bounce buffer\n");
+		ret = -ENOMEM;
+		goto err3;
+	}
+
 	dev_set_name(&dwc->gadget.dev, "gadget");
 
 	dwc->gadget.ops			= &dwc3_gadget_ops;
@@ -1968,7 +1978,7 @@
 
 	ret = dwc3_gadget_init_endpoints(dwc);
 	if (ret)
-		goto err3;
+		goto err4;
 
 	irq = platform_get_irq(to_platform_device(dwc->dev), 0);
 
@@ -1977,7 +1987,7 @@
 	if (ret) {
 		dev_err(dwc->dev, "failed to request irq #%d --> %d\n",
 				irq, ret);
-		goto err4;
+		goto err5;
 	}
 
 	/* Enable all but Start and End of Frame IRQs */
@@ -1996,27 +2006,31 @@
 	if (ret) {
 		dev_err(dwc->dev, "failed to register gadget device\n");
 		put_device(&dwc->gadget.dev);
-		goto err5;
+		goto err6;
 	}
 
 	ret = usb_add_gadget_udc(dwc->dev, &dwc->gadget);
 	if (ret) {
 		dev_err(dwc->dev, "failed to register udc\n");
-		goto err6;
+		goto err7;
 	}
 
 	return 0;
 
-err6:
+err7:
 	device_unregister(&dwc->gadget.dev);
 
-err5:
+err6:
 	dwc3_writel(dwc->regs, DWC3_DEVTEN, 0x00);
 	free_irq(irq, dwc);
 
-err4:
+err5:
 	dwc3_gadget_free_endpoints(dwc);
 
+err4:
+	dma_free_coherent(dwc->dev, 512, dwc->ep0_bounce,
+			dwc->ep0_bounce_addr);
+
 err3:
 	dma_free_coherent(dwc->dev, sizeof(*dwc->setup_buf) * 2,
 			dwc->setup_buf, dwc->setup_buf_addr);
@@ -2049,6 +2063,9 @@
 
 	dwc3_gadget_free_endpoints(dwc);
 
+	dma_free_coherent(dwc->dev, 512, dwc->ep0_bounce,
+			dwc->ep0_bounce_addr);
+
 	dma_free_coherent(dwc->dev, sizeof(*dwc->setup_buf) * 2,
 			dwc->setup_buf, dwc->setup_buf_addr);
 
diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig
index fe56379..a60b472 100644
--- a/drivers/usb/gadget/Kconfig
+++ b/drivers/usb/gadget/Kconfig
@@ -96,6 +96,22 @@
 	   This value will be used except for system-specific gadget
 	   drivers that have more specific information.
 
+config USB_GADGET_STORAGE_NUM_BUFFERS
+	int "Number of storage pipeline buffers"
+	range 2 4
+	default 2
+	help
+	   Usually 2 buffers are enough to establish a good buffering
+	   pipeline. The number may be increased in order to compensate
+	   for a bursty VFS behaviour. For instance there may be CPU wake up
+	   latencies that makes the VFS to appear bursty in a system with
+	   an CPU on-demand governor. Especially if DMA is doing IO to
+	   offload the CPU. In this case the CPU will go into power
+	   save often and spin up occasionally to move data within VFS.
+	   If selecting USB_GADGET_DEBUG_FILES this value may be set by
+	   a module parameter as well.
+	   If unsure, say 2.
+
 #
 # USB Peripheral Controller Support
 #
diff --git a/drivers/usb/gadget/f_audio.c b/drivers/usb/gadget/f_audio.c
index a9a4ead..ec7ffcd 100644
--- a/drivers/usb/gadget/f_audio.c
+++ b/drivers/usb/gadget/f_audio.c
@@ -460,7 +460,7 @@
 
 	switch (ctrl->bRequest) {
 	case UAC_SET_CUR:
-		value = 0;
+		value = len;
 		break;
 
 	case UAC_SET_MIN:
@@ -499,7 +499,7 @@
 	case UAC_GET_MIN:
 	case UAC_GET_MAX:
 	case UAC_GET_RES:
-		value = 3;
+		value = len;
 		break;
 	case UAC_GET_MEM:
 		break;
@@ -681,17 +681,18 @@
 
 	status = -ENOMEM;
 
-	/* supcard all relevant hardware speeds... we expect that when
+	/* copy descriptors, and track endpoint copies */
+	f->descriptors = usb_copy_descriptors(f_audio_desc);
+
+	/*
+	 * support all relevant hardware speeds... we expect that when
 	 * hardware is dual speed, all bulk-capable endpoints work at
 	 * both speeds
 	 */
-
-	/* copy descriptors, and track endpoint copies */
 	if (gadget_is_dualspeed(c->cdev->gadget)) {
 		c->highspeed = true;
 		f->hs_descriptors = usb_copy_descriptors(f_audio_desc);
-	} else
-		f->descriptors = usb_copy_descriptors(f_audio_desc);
+	}
 
 	return 0;
 
diff --git a/drivers/usb/gadget/f_mass_storage.c b/drivers/usb/gadget/f_mass_storage.c
index 4ce3dec..7569414 100644
--- a/drivers/usb/gadget/f_mass_storage.c
+++ b/drivers/usb/gadget/f_mass_storage.c
@@ -112,8 +112,7 @@
  * is not loaded (an empty string as "filename" in the fsg_config
  * structure causes error).  The CD-ROM emulation includes a single
  * data track and no audio tracks; hence there need be only one
- * backing file per LUN.  Note also that the CD-ROM block length is
- * set to 512 rather than the more common value 2048.
+ * backing file per LUN.
  *
  *
  * MSF includes support for module parameters.  If gadget using it
@@ -363,7 +362,7 @@
 
 	struct fsg_buffhd	*next_buffhd_to_fill;
 	struct fsg_buffhd	*next_buffhd_to_drain;
-	struct fsg_buffhd	buffhds[FSG_NUM_BUFFERS];
+	struct fsg_buffhd	*buffhds;
 
 	int			cmnd_size;
 	u8			cmnd[MAX_COMMAND_SIZE];
@@ -745,7 +744,6 @@
 	u32			amount_left;
 	loff_t			file_offset, file_offset_tmp;
 	unsigned int		amount;
-	unsigned int		partial_page;
 	ssize_t			nread;
 
 	/*
@@ -771,7 +769,7 @@
 		curlun->sense_data = SS_LOGICAL_BLOCK_ADDRESS_OUT_OF_RANGE;
 		return -EINVAL;
 	}
-	file_offset = ((loff_t) lba) << 9;
+	file_offset = ((loff_t) lba) << curlun->blkbits;
 
 	/* Carry out the file reads */
 	amount_left = common->data_size_from_cmnd;
@@ -784,18 +782,10 @@
 		 * Try to read the remaining amount.
 		 * But don't read more than the buffer size.
 		 * And don't try to read past the end of the file.
-		 * Finally, if we're not at a page boundary, don't read past
-		 *	the next page.
-		 * If this means reading 0 then we were asked to read past
-		 *	the end of file.
 		 */
 		amount = min(amount_left, FSG_BUFLEN);
 		amount = min((loff_t)amount,
 			     curlun->file_length - file_offset);
-		partial_page = file_offset & (PAGE_CACHE_SIZE - 1);
-		if (partial_page > 0)
-			amount = min(amount, (unsigned int)PAGE_CACHE_SIZE -
-					     partial_page);
 
 		/* Wait for the next buffer to become available */
 		bh = common->next_buffhd_to_fill;
@@ -812,7 +802,8 @@
 		if (amount == 0) {
 			curlun->sense_data =
 					SS_LOGICAL_BLOCK_ADDRESS_OUT_OF_RANGE;
-			curlun->sense_data_info = file_offset >> 9;
+			curlun->sense_data_info =
+					file_offset >> curlun->blkbits;
 			curlun->info_valid = 1;
 			bh->inreq->length = 0;
 			bh->state = BUF_STATE_FULL;
@@ -835,18 +826,25 @@
 		} else if (nread < amount) {
 			LDBG(curlun, "partial file read: %d/%u\n",
 			     (int)nread, amount);
-			nread -= (nread & 511);	/* Round down to a block */
+			nread = round_down(nread, curlun->blksize);
 		}
 		file_offset  += nread;
 		amount_left  -= nread;
 		common->residue -= nread;
+
+		/*
+		 * Except at the end of the transfer, nread will be
+		 * equal to the buffer size, which is divisible by the
+		 * bulk-in maxpacket size.
+		 */
 		bh->inreq->length = nread;
 		bh->state = BUF_STATE_FULL;
 
 		/* If an error occurred, report it and its position */
 		if (nread < amount) {
 			curlun->sense_data = SS_UNRECOVERED_READ_ERROR;
-			curlun->sense_data_info = file_offset >> 9;
+			curlun->sense_data_info =
+					file_offset >> curlun->blkbits;
 			curlun->info_valid = 1;
 			break;
 		}
@@ -877,7 +875,6 @@
 	u32			amount_left_to_req, amount_left_to_write;
 	loff_t			usb_offset, file_offset, file_offset_tmp;
 	unsigned int		amount;
-	unsigned int		partial_page;
 	ssize_t			nwritten;
 	int			rc;
 
@@ -921,7 +918,7 @@
 
 	/* Carry out the file writes */
 	get_some_more = 1;
-	file_offset = usb_offset = ((loff_t) lba) << 9;
+	file_offset = usb_offset = ((loff_t) lba) << curlun->blkbits;
 	amount_left_to_req = common->data_size_from_cmnd;
 	amount_left_to_write = common->data_size_from_cmnd;
 
@@ -933,41 +930,21 @@
 
 			/*
 			 * Figure out how much we want to get:
-			 * Try to get the remaining amount.
-			 * But don't get more than the buffer size.
-			 * And don't try to go past the end of the file.
-			 * If we're not at a page boundary,
-			 *	don't go past the next page.
-			 * If this means getting 0, then we were asked
-			 *	to write past the end of file.
-			 * Finally, round down to a block boundary.
+			 * Try to get the remaining amount,
+			 * but not more than the buffer size.
 			 */
 			amount = min(amount_left_to_req, FSG_BUFLEN);
-			amount = min((loff_t)amount,
-				     curlun->file_length - usb_offset);
-			partial_page = usb_offset & (PAGE_CACHE_SIZE - 1);
-			if (partial_page > 0)
-				amount = min(amount,
-	(unsigned int)PAGE_CACHE_SIZE - partial_page);
 
-			if (amount == 0) {
+			/* Beyond the end of the backing file? */
+			if (usb_offset >= curlun->file_length) {
 				get_some_more = 0;
 				curlun->sense_data =
 					SS_LOGICAL_BLOCK_ADDRESS_OUT_OF_RANGE;
-				curlun->sense_data_info = usb_offset >> 9;
+				curlun->sense_data_info =
+					usb_offset >> curlun->blkbits;
 				curlun->info_valid = 1;
 				continue;
 			}
-			amount -= amount & 511;
-			if (amount == 0) {
-
-				/*
-				 * Why were we were asked to transfer a
-				 * partial block?
-				 */
-				get_some_more = 0;
-				continue;
-			}
 
 			/* Get the next buffer */
 			usb_offset += amount;
@@ -977,8 +954,9 @@
 				get_some_more = 0;
 
 			/*
-			 * amount is always divisible by 512, hence by
-			 * the bulk-out maxpacket size
+			 * Except at the end of the transfer, amount will be
+			 * equal to the buffer size, which is divisible by
+			 * the bulk-out maxpacket size.
 			 */
 			bh->outreq->length = amount;
 			bh->bulk_out_intended_length = amount;
@@ -1002,7 +980,8 @@
 			/* Did something go wrong with the transfer? */
 			if (bh->outreq->status != 0) {
 				curlun->sense_data = SS_COMMUNICATION_FAILURE;
-				curlun->sense_data_info = file_offset >> 9;
+				curlun->sense_data_info =
+					file_offset >> curlun->blkbits;
 				curlun->info_valid = 1;
 				break;
 			}
@@ -1016,6 +995,11 @@
 				amount = curlun->file_length - file_offset;
 			}
 
+			/* Don't write a partial block */
+			amount = round_down(amount, curlun->blksize);
+			if (amount == 0)
+				goto empty_write;
+
 			/* Perform the write */
 			file_offset_tmp = file_offset;
 			nwritten = vfs_write(curlun->filp,
@@ -1033,8 +1017,7 @@
 			} else if (nwritten < amount) {
 				LDBG(curlun, "partial file write: %d/%u\n",
 				     (int)nwritten, amount);
-				nwritten -= (nwritten & 511);
-				/* Round down to a block */
+				nwritten = round_down(nwritten, curlun->blksize);
 			}
 			file_offset += nwritten;
 			amount_left_to_write -= nwritten;
@@ -1043,11 +1026,13 @@
 			/* If an error occurred, report it and its position */
 			if (nwritten < amount) {
 				curlun->sense_data = SS_WRITE_ERROR;
-				curlun->sense_data_info = file_offset >> 9;
+				curlun->sense_data_info =
+					file_offset >> curlun->blkbits;
 				curlun->info_valid = 1;
 				break;
 			}
 
+ empty_write:
 			/* Did the host decide to stop early? */
 			if (bh->outreq->actual != bh->outreq->length) {
 				common->short_packet_received = 1;
@@ -1129,8 +1114,8 @@
 		return -EIO;		/* No default reply */
 
 	/* Prepare to carry out the file verify */
-	amount_left = verification_length << 9;
-	file_offset = ((loff_t) lba) << 9;
+	amount_left = verification_length << curlun->blkbits;
+	file_offset = ((loff_t) lba) << curlun->blkbits;
 
 	/* Write out all the dirty buffers before invalidating them */
 	fsg_lun_fsync_sub(curlun);
@@ -1148,8 +1133,6 @@
 		 * Try to read the remaining amount, but not more than
 		 * the buffer size.
 		 * And don't try to read past the end of the file.
-		 * If this means reading 0 then we were asked to read
-		 * past the end of file.
 		 */
 		amount = min(amount_left, FSG_BUFLEN);
 		amount = min((loff_t)amount,
@@ -1157,7 +1140,8 @@
 		if (amount == 0) {
 			curlun->sense_data =
 					SS_LOGICAL_BLOCK_ADDRESS_OUT_OF_RANGE;
-			curlun->sense_data_info = file_offset >> 9;
+			curlun->sense_data_info =
+				file_offset >> curlun->blkbits;
 			curlun->info_valid = 1;
 			break;
 		}
@@ -1179,11 +1163,12 @@
 		} else if (nread < amount) {
 			LDBG(curlun, "partial file verify: %d/%u\n",
 			     (int)nread, amount);
-			nread -= nread & 511;	/* Round down to a sector */
+			nread = round_down(nread, curlun->blksize);
 		}
 		if (nread == 0) {
 			curlun->sense_data = SS_UNRECOVERED_READ_ERROR;
-			curlun->sense_data_info = file_offset >> 9;
+			curlun->sense_data_info =
+				file_offset >> curlun->blkbits;
 			curlun->info_valid = 1;
 			break;
 		}
@@ -1289,7 +1274,7 @@
 
 	put_unaligned_be32(curlun->num_sectors - 1, &buf[0]);
 						/* Max logical block */
-	put_unaligned_be32(512, &buf[4]);	/* Block length */
+	put_unaligned_be32(curlun->blksize, &buf[4]);/* Block length */
 	return 8;
 }
 
@@ -1527,7 +1512,7 @@
 
 	put_unaligned_be32(curlun->num_sectors, &buf[0]);
 						/* Number of blocks */
-	put_unaligned_be32(512, &buf[4]);	/* Block length */
+	put_unaligned_be32(curlun->blksize, &buf[4]);/* Block length */
 	buf[4] = 0x02;				/* Current capacity */
 	return 12;
 }
@@ -1623,7 +1608,8 @@
 			amount = min(common->usb_amount_left, FSG_BUFLEN);
 
 			/*
-			 * amount is always divisible by 512, hence by
+			 * Except at the end of the transfer, amount will be
+			 * equal to the buffer size, which is divisible by
 			 * the bulk-out maxpacket size.
 			 */
 			bh->outreq->length = amount;
@@ -2022,7 +2008,8 @@
 
 	case READ_6:
 		i = common->cmnd[4];
-		common->data_size_from_cmnd = (i == 0 ? 256 : i) << 9;
+		common->data_size_from_cmnd = (i == 0 ? 256 : i) <<
+				common->curlun->blkbits;
 		reply = check_command(common, 6, DATA_DIR_TO_HOST,
 				      (7<<1) | (1<<4), 1,
 				      "READ(6)");
@@ -2032,7 +2019,8 @@
 
 	case READ_10:
 		common->data_size_from_cmnd =
-				get_unaligned_be16(&common->cmnd[7]) << 9;
+				get_unaligned_be16(&common->cmnd[7]) <<
+						common->curlun->blkbits;
 		reply = check_command(common, 10, DATA_DIR_TO_HOST,
 				      (1<<1) | (0xf<<2) | (3<<7), 1,
 				      "READ(10)");
@@ -2042,7 +2030,8 @@
 
 	case READ_12:
 		common->data_size_from_cmnd =
-				get_unaligned_be32(&common->cmnd[6]) << 9;
+				get_unaligned_be32(&common->cmnd[6]) <<
+						common->curlun->blkbits;
 		reply = check_command(common, 12, DATA_DIR_TO_HOST,
 				      (1<<1) | (0xf<<2) | (0xf<<6), 1,
 				      "READ(12)");
@@ -2142,7 +2131,8 @@
 
 	case WRITE_6:
 		i = common->cmnd[4];
-		common->data_size_from_cmnd = (i == 0 ? 256 : i) << 9;
+		common->data_size_from_cmnd = (i == 0 ? 256 : i) <<
+					common->curlun->blkbits;
 		reply = check_command(common, 6, DATA_DIR_FROM_HOST,
 				      (7<<1) | (1<<4), 1,
 				      "WRITE(6)");
@@ -2152,7 +2142,8 @@
 
 	case WRITE_10:
 		common->data_size_from_cmnd =
-				get_unaligned_be16(&common->cmnd[7]) << 9;
+				get_unaligned_be16(&common->cmnd[7]) <<
+						common->curlun->blkbits;
 		reply = check_command(common, 10, DATA_DIR_FROM_HOST,
 				      (1<<1) | (0xf<<2) | (3<<7), 1,
 				      "WRITE(10)");
@@ -2162,7 +2153,8 @@
 
 	case WRITE_12:
 		common->data_size_from_cmnd =
-				get_unaligned_be32(&common->cmnd[6]) << 9;
+				get_unaligned_be32(&common->cmnd[6]) <<
+						common->curlun->blkbits;
 		reply = check_command(common, 12, DATA_DIR_FROM_HOST,
 				      (1<<1) | (0xf<<2) | (0xf<<6), 1,
 				      "WRITE(12)");
@@ -2348,7 +2340,7 @@
 	if (common->fsg) {
 		fsg = common->fsg;
 
-		for (i = 0; i < FSG_NUM_BUFFERS; ++i) {
+		for (i = 0; i < fsg_num_buffers; ++i) {
 			struct fsg_buffhd *bh = &common->buffhds[i];
 
 			if (bh->inreq) {
@@ -2405,7 +2397,7 @@
 	clear_bit(IGNORE_BULK_OUT, &fsg->atomic_bitflags);
 
 	/* Allocate the requests */
-	for (i = 0; i < FSG_NUM_BUFFERS; ++i) {
+	for (i = 0; i < fsg_num_buffers; ++i) {
 		struct fsg_buffhd	*bh = &common->buffhds[i];
 
 		rc = alloc_request(common, fsg->bulk_in, &bh->inreq);
@@ -2474,7 +2466,7 @@
 
 	/* Cancel all the pending transfers */
 	if (likely(common->fsg)) {
-		for (i = 0; i < FSG_NUM_BUFFERS; ++i) {
+		for (i = 0; i < fsg_num_buffers; ++i) {
 			bh = &common->buffhds[i];
 			if (bh->inreq_busy)
 				usb_ep_dequeue(common->fsg->bulk_in, bh->inreq);
@@ -2486,7 +2478,7 @@
 		/* Wait until everything is idle */
 		for (;;) {
 			int num_active = 0;
-			for (i = 0; i < FSG_NUM_BUFFERS; ++i) {
+			for (i = 0; i < fsg_num_buffers; ++i) {
 				bh = &common->buffhds[i];
 				num_active += bh->inreq_busy + bh->outreq_busy;
 			}
@@ -2509,7 +2501,7 @@
 	 */
 	spin_lock_irq(&common->lock);
 
-	for (i = 0; i < FSG_NUM_BUFFERS; ++i) {
+	for (i = 0; i < fsg_num_buffers; ++i) {
 		bh = &common->buffhds[i];
 		bh->state = BUF_STATE_EMPTY;
 	}
@@ -2718,6 +2710,10 @@
 	int nluns, i, rc;
 	char *pathbuf;
 
+	rc = fsg_num_buffers_validate();
+	if (rc != 0)
+		return ERR_PTR(rc);
+
 	/* Find out how many LUNs there should be */
 	nluns = cfg->nluns;
 	if (nluns < 1 || nluns > FSG_MAX_LUNS) {
@@ -2736,6 +2732,14 @@
 		common->free_storage_on_release = 0;
 	}
 
+	common->buffhds = kcalloc(fsg_num_buffers,
+				  sizeof *(common->buffhds), GFP_KERNEL);
+	if (!common->buffhds) {
+		if (common->free_storage_on_release)
+			kfree(common);
+		return ERR_PTR(-ENOMEM);
+	}
+
 	common->ops = cfg->ops;
 	common->private_data = cfg->private_data;
 
@@ -2813,7 +2817,7 @@
 
 	/* Data buffers cyclic list */
 	bh = common->buffhds;
-	i = FSG_NUM_BUFFERS;
+	i = fsg_num_buffers;
 	goto buffhds_first_it;
 	do {
 		bh->next = bh + 1;
@@ -2939,12 +2943,13 @@
 
 	{
 		struct fsg_buffhd *bh = common->buffhds;
-		unsigned i = FSG_NUM_BUFFERS;
+		unsigned i = fsg_num_buffers;
 		do {
 			kfree(bh->buf);
 		} while (++bh, --i);
 	}
 
+	kfree(common->buffhds);
 	if (common->free_storage_on_release)
 		kfree(common);
 }
diff --git a/drivers/usb/gadget/file_storage.c b/drivers/usb/gadget/file_storage.c
index 4ac8084..12ac30b 100644
--- a/drivers/usb/gadget/file_storage.c
+++ b/drivers/usb/gadget/file_storage.c
@@ -69,8 +69,7 @@
  * each LUN would be settable independently as a disk drive or a CD-ROM
  * drive, but currently all LUNs have to be the same type.  The CD-ROM
  * emulation includes a single data track and no audio tracks; hence there
- * need be only one backing file per LUN.  Note also that the CD-ROM block
- * length is set to 512 rather than the more common value 2048.
+ * need be only one backing file per LUN.
  *
  * Requirements are modest; only a bulk-in and a bulk-out endpoint are
  * needed (an interrupt-out endpoint is also needed for CBI).  The memory
@@ -461,7 +460,6 @@
 
 	struct fsg_buffhd	*next_buffhd_to_fill;
 	struct fsg_buffhd	*next_buffhd_to_drain;
-	struct fsg_buffhd	buffhds[FSG_NUM_BUFFERS];
 
 	int			thread_wakeup_needed;
 	struct completion	thread_notifier;
@@ -488,6 +486,8 @@
 	unsigned int		nluns;
 	struct fsg_lun		*luns;
 	struct fsg_lun		*curlun;
+	/* Must be the last entry */
+	struct fsg_buffhd	buffhds[];
 };
 
 typedef void (*fsg_routine_t)(struct fsg_dev *);
@@ -1136,7 +1136,6 @@
 	u32			amount_left;
 	loff_t			file_offset, file_offset_tmp;
 	unsigned int		amount;
-	unsigned int		partial_page;
 	ssize_t			nread;
 
 	/* Get the starting Logical Block Address and check that it's
@@ -1158,7 +1157,7 @@
 		curlun->sense_data = SS_LOGICAL_BLOCK_ADDRESS_OUT_OF_RANGE;
 		return -EINVAL;
 	}
-	file_offset = ((loff_t) lba) << 9;
+	file_offset = ((loff_t) lba) << curlun->blkbits;
 
 	/* Carry out the file reads */
 	amount_left = fsg->data_size_from_cmnd;
@@ -1171,17 +1170,10 @@
 		 * Try to read the remaining amount.
 		 * But don't read more than the buffer size.
 		 * And don't try to read past the end of the file.
-		 * Finally, if we're not at a page boundary, don't read past
-		 *	the next page.
-		 * If this means reading 0 then we were asked to read past
-		 *	the end of file. */
+		 */
 		amount = min((unsigned int) amount_left, mod_data.buflen);
 		amount = min((loff_t) amount,
 				curlun->file_length - file_offset);
-		partial_page = file_offset & (PAGE_CACHE_SIZE - 1);
-		if (partial_page > 0)
-			amount = min(amount, (unsigned int) PAGE_CACHE_SIZE -
-					partial_page);
 
 		/* Wait for the next buffer to become available */
 		bh = fsg->next_buffhd_to_fill;
@@ -1196,7 +1188,7 @@
 		if (amount == 0) {
 			curlun->sense_data =
 					SS_LOGICAL_BLOCK_ADDRESS_OUT_OF_RANGE;
-			curlun->sense_data_info = file_offset >> 9;
+			curlun->sense_data_info = file_offset >> curlun->blkbits;
 			curlun->info_valid = 1;
 			bh->inreq->length = 0;
 			bh->state = BUF_STATE_FULL;
@@ -1221,18 +1213,23 @@
 		} else if (nread < amount) {
 			LDBG(curlun, "partial file read: %d/%u\n",
 					(int) nread, amount);
-			nread -= (nread & 511);	// Round down to a block
+			nread = round_down(nread, curlun->blksize);
 		}
 		file_offset  += nread;
 		amount_left  -= nread;
 		fsg->residue -= nread;
+
+		/* Except at the end of the transfer, nread will be
+		 * equal to the buffer size, which is divisible by the
+		 * bulk-in maxpacket size.
+		 */
 		bh->inreq->length = nread;
 		bh->state = BUF_STATE_FULL;
 
 		/* If an error occurred, report it and its position */
 		if (nread < amount) {
 			curlun->sense_data = SS_UNRECOVERED_READ_ERROR;
-			curlun->sense_data_info = file_offset >> 9;
+			curlun->sense_data_info = file_offset >> curlun->blkbits;
 			curlun->info_valid = 1;
 			break;
 		}
@@ -1262,7 +1259,6 @@
 	u32			amount_left_to_req, amount_left_to_write;
 	loff_t			usb_offset, file_offset, file_offset_tmp;
 	unsigned int		amount;
-	unsigned int		partial_page;
 	ssize_t			nwritten;
 	int			rc;
 
@@ -1303,7 +1299,7 @@
 
 	/* Carry out the file writes */
 	get_some_more = 1;
-	file_offset = usb_offset = ((loff_t) lba) << 9;
+	file_offset = usb_offset = ((loff_t) lba) << curlun->blkbits;
 	amount_left_to_req = amount_left_to_write = fsg->data_size_from_cmnd;
 
 	while (amount_left_to_write > 0) {
@@ -1313,38 +1309,20 @@
 		if (bh->state == BUF_STATE_EMPTY && get_some_more) {
 
 			/* Figure out how much we want to get:
-			 * Try to get the remaining amount.
-			 * But don't get more than the buffer size.
-			 * And don't try to go past the end of the file.
-			 * If we're not at a page boundary,
-			 *	don't go past the next page.
-			 * If this means getting 0, then we were asked
-			 *	to write past the end of file.
-			 * Finally, round down to a block boundary. */
+			 * Try to get the remaining amount,
+			 * but not more than the buffer size.
+			 */
 			amount = min(amount_left_to_req, mod_data.buflen);
-			amount = min((loff_t) amount, curlun->file_length -
-					usb_offset);
-			partial_page = usb_offset & (PAGE_CACHE_SIZE - 1);
-			if (partial_page > 0)
-				amount = min(amount,
-	(unsigned int) PAGE_CACHE_SIZE - partial_page);
 
-			if (amount == 0) {
+			/* Beyond the end of the backing file? */
+			if (usb_offset >= curlun->file_length) {
 				get_some_more = 0;
 				curlun->sense_data =
 					SS_LOGICAL_BLOCK_ADDRESS_OUT_OF_RANGE;
-				curlun->sense_data_info = usb_offset >> 9;
+				curlun->sense_data_info = usb_offset >> curlun->blkbits;
 				curlun->info_valid = 1;
 				continue;
 			}
-			amount -= (amount & 511);
-			if (amount == 0) {
-
-				/* Why were we were asked to transfer a
-				 * partial block? */
-				get_some_more = 0;
-				continue;
-			}
 
 			/* Get the next buffer */
 			usb_offset += amount;
@@ -1353,8 +1331,10 @@
 			if (amount_left_to_req == 0)
 				get_some_more = 0;
 
-			/* amount is always divisible by 512, hence by
-			 * the bulk-out maxpacket size */
+			/* Except at the end of the transfer, amount will be
+			 * equal to the buffer size, which is divisible by
+			 * the bulk-out maxpacket size.
+			 */
 			bh->outreq->length = bh->bulk_out_intended_length =
 					amount;
 			bh->outreq->short_not_ok = 1;
@@ -1376,7 +1356,7 @@
 			/* Did something go wrong with the transfer? */
 			if (bh->outreq->status != 0) {
 				curlun->sense_data = SS_COMMUNICATION_FAILURE;
-				curlun->sense_data_info = file_offset >> 9;
+				curlun->sense_data_info = file_offset >> curlun->blkbits;
 				curlun->info_valid = 1;
 				break;
 			}
@@ -1390,6 +1370,11 @@
 				amount = curlun->file_length - file_offset;
 			}
 
+			/* Don't write a partial block */
+			amount = round_down(amount, curlun->blksize);
+			if (amount == 0)
+				goto empty_write;
+
 			/* Perform the write */
 			file_offset_tmp = file_offset;
 			nwritten = vfs_write(curlun->filp,
@@ -1408,8 +1393,7 @@
 			} else if (nwritten < amount) {
 				LDBG(curlun, "partial file write: %d/%u\n",
 						(int) nwritten, amount);
-				nwritten -= (nwritten & 511);
-						// Round down to a block
+				nwritten = round_down(nwritten, curlun->blksize);
 			}
 			file_offset += nwritten;
 			amount_left_to_write -= nwritten;
@@ -1418,11 +1402,12 @@
 			/* If an error occurred, report it and its position */
 			if (nwritten < amount) {
 				curlun->sense_data = SS_WRITE_ERROR;
-				curlun->sense_data_info = file_offset >> 9;
+				curlun->sense_data_info = file_offset >> curlun->blkbits;
 				curlun->info_valid = 1;
 				break;
 			}
 
+ empty_write:
 			/* Did the host decide to stop early? */
 			if (bh->outreq->actual != bh->outreq->length) {
 				fsg->short_packet_received = 1;
@@ -1500,8 +1485,8 @@
 		return -EIO;		// No default reply
 
 	/* Prepare to carry out the file verify */
-	amount_left = verification_length << 9;
-	file_offset = ((loff_t) lba) << 9;
+	amount_left = verification_length << curlun->blkbits;
+	file_offset = ((loff_t) lba) << curlun->blkbits;
 
 	/* Write out all the dirty buffers before invalidating them */
 	fsg_lun_fsync_sub(curlun);
@@ -1519,15 +1504,14 @@
 		 * Try to read the remaining amount, but not more than
 		 * the buffer size.
 		 * And don't try to read past the end of the file.
-		 * If this means reading 0 then we were asked to read
-		 * past the end of file. */
+		 */
 		amount = min((unsigned int) amount_left, mod_data.buflen);
 		amount = min((loff_t) amount,
 				curlun->file_length - file_offset);
 		if (amount == 0) {
 			curlun->sense_data =
 					SS_LOGICAL_BLOCK_ADDRESS_OUT_OF_RANGE;
-			curlun->sense_data_info = file_offset >> 9;
+			curlun->sense_data_info = file_offset >> curlun->blkbits;
 			curlun->info_valid = 1;
 			break;
 		}
@@ -1550,11 +1534,11 @@
 		} else if (nread < amount) {
 			LDBG(curlun, "partial file verify: %d/%u\n",
 					(int) nread, amount);
-			nread -= (nread & 511);	// Round down to a sector
+			nread = round_down(nread, curlun->blksize);
 		}
 		if (nread == 0) {
 			curlun->sense_data = SS_UNRECOVERED_READ_ERROR;
-			curlun->sense_data_info = file_offset >> 9;
+			curlun->sense_data_info = file_offset >> curlun->blkbits;
 			curlun->info_valid = 1;
 			break;
 		}
@@ -1668,7 +1652,7 @@
 
 	put_unaligned_be32(curlun->num_sectors - 1, &buf[0]);
 						/* Max logical block */
-	put_unaligned_be32(512, &buf[4]);	/* Block length */
+	put_unaligned_be32(curlun->blksize, &buf[4]);	/* Block length */
 	return 8;
 }
 
@@ -1890,7 +1874,7 @@
 
 	put_unaligned_be32(curlun->num_sectors, &buf[0]);
 						/* Number of blocks */
-	put_unaligned_be32(512, &buf[4]);	/* Block length */
+	put_unaligned_be32(curlun->blksize, &buf[4]);	/* Block length */
 	buf[4] = 0x02;				/* Current capacity */
 	return 12;
 }
@@ -1983,8 +1967,10 @@
 			amount = min(fsg->usb_amount_left,
 					(u32) mod_data.buflen);
 
-			/* amount is always divisible by 512, hence by
-			 * the bulk-out maxpacket size */
+			/* Except at the end of the transfer, amount will be
+			 * equal to the buffer size, which is divisible by
+			 * the bulk-out maxpacket size.
+			 */
 			bh->outreq->length = bh->bulk_out_intended_length =
 					amount;
 			bh->outreq->short_not_ok = 1;
@@ -2415,7 +2401,7 @@
 
 	case READ_6:
 		i = fsg->cmnd[4];
-		fsg->data_size_from_cmnd = (i == 0 ? 256 : i) << 9;
+		fsg->data_size_from_cmnd = (i == 0 ? 256 : i) << fsg->curlun->blkbits;
 		if ((reply = check_command(fsg, 6, DATA_DIR_TO_HOST,
 				(7<<1) | (1<<4), 1,
 				"READ(6)")) == 0)
@@ -2424,7 +2410,7 @@
 
 	case READ_10:
 		fsg->data_size_from_cmnd =
-				get_unaligned_be16(&fsg->cmnd[7]) << 9;
+				get_unaligned_be16(&fsg->cmnd[7]) << fsg->curlun->blkbits;
 		if ((reply = check_command(fsg, 10, DATA_DIR_TO_HOST,
 				(1<<1) | (0xf<<2) | (3<<7), 1,
 				"READ(10)")) == 0)
@@ -2433,7 +2419,7 @@
 
 	case READ_12:
 		fsg->data_size_from_cmnd =
-				get_unaligned_be32(&fsg->cmnd[6]) << 9;
+				get_unaligned_be32(&fsg->cmnd[6]) << fsg->curlun->blkbits;
 		if ((reply = check_command(fsg, 12, DATA_DIR_TO_HOST,
 				(1<<1) | (0xf<<2) | (0xf<<6), 1,
 				"READ(12)")) == 0)
@@ -2519,7 +2505,7 @@
 
 	case WRITE_6:
 		i = fsg->cmnd[4];
-		fsg->data_size_from_cmnd = (i == 0 ? 256 : i) << 9;
+		fsg->data_size_from_cmnd = (i == 0 ? 256 : i) << fsg->curlun->blkbits;
 		if ((reply = check_command(fsg, 6, DATA_DIR_FROM_HOST,
 				(7<<1) | (1<<4), 1,
 				"WRITE(6)")) == 0)
@@ -2528,7 +2514,7 @@
 
 	case WRITE_10:
 		fsg->data_size_from_cmnd =
-				get_unaligned_be16(&fsg->cmnd[7]) << 9;
+				get_unaligned_be16(&fsg->cmnd[7]) << fsg->curlun->blkbits;
 		if ((reply = check_command(fsg, 10, DATA_DIR_FROM_HOST,
 				(1<<1) | (0xf<<2) | (3<<7), 1,
 				"WRITE(10)")) == 0)
@@ -2537,7 +2523,7 @@
 
 	case WRITE_12:
 		fsg->data_size_from_cmnd =
-				get_unaligned_be32(&fsg->cmnd[6]) << 9;
+				get_unaligned_be32(&fsg->cmnd[6]) << fsg->curlun->blkbits;
 		if ((reply = check_command(fsg, 12, DATA_DIR_FROM_HOST,
 				(1<<1) | (0xf<<2) | (0xf<<6), 1,
 				"WRITE(12)")) == 0)
@@ -2752,7 +2738,7 @@
 
 reset:
 	/* Deallocate the requests */
-	for (i = 0; i < FSG_NUM_BUFFERS; ++i) {
+	for (i = 0; i < fsg_num_buffers; ++i) {
 		struct fsg_buffhd *bh = &fsg->buffhds[i];
 
 		if (bh->inreq) {
@@ -2813,7 +2799,7 @@
 	}
 
 	/* Allocate the requests */
-	for (i = 0; i < FSG_NUM_BUFFERS; ++i) {
+	for (i = 0; i < fsg_num_buffers; ++i) {
 		struct fsg_buffhd	*bh = &fsg->buffhds[i];
 
 		if ((rc = alloc_request(fsg, fsg->bulk_in, &bh->inreq)) != 0)
@@ -2902,7 +2888,7 @@
 	/* Cancel all the pending transfers */
 	if (fsg->intreq_busy)
 		usb_ep_dequeue(fsg->intr_in, fsg->intreq);
-	for (i = 0; i < FSG_NUM_BUFFERS; ++i) {
+	for (i = 0; i < fsg_num_buffers; ++i) {
 		bh = &fsg->buffhds[i];
 		if (bh->inreq_busy)
 			usb_ep_dequeue(fsg->bulk_in, bh->inreq);
@@ -2913,7 +2899,7 @@
 	/* Wait until everything is idle */
 	for (;;) {
 		num_active = fsg->intreq_busy;
-		for (i = 0; i < FSG_NUM_BUFFERS; ++i) {
+		for (i = 0; i < fsg_num_buffers; ++i) {
 			bh = &fsg->buffhds[i];
 			num_active += bh->inreq_busy + bh->outreq_busy;
 		}
@@ -2935,7 +2921,7 @@
 	 * state, and the exception.  Then invoke the handler. */
 	spin_lock_irq(&fsg->lock);
 
-	for (i = 0; i < FSG_NUM_BUFFERS; ++i) {
+	for (i = 0; i < fsg_num_buffers; ++i) {
 		bh = &fsg->buffhds[i];
 		bh->state = BUF_STATE_EMPTY;
 	}
@@ -3165,7 +3151,7 @@
 	}
 
 	/* Free the data buffers */
-	for (i = 0; i < FSG_NUM_BUFFERS; ++i)
+	for (i = 0; i < fsg_num_buffers; ++i)
 		kfree(fsg->buffhds[i].buf);
 
 	/* Free the request and buffer for endpoint 0 */
@@ -3453,7 +3439,7 @@
 	req->complete = ep0_complete;
 
 	/* Allocate the data buffers */
-	for (i = 0; i < FSG_NUM_BUFFERS; ++i) {
+	for (i = 0; i < fsg_num_buffers; ++i) {
 		struct fsg_buffhd	*bh = &fsg->buffhds[i];
 
 		/* Allocate for the bulk-in endpoint.  We assume that
@@ -3464,7 +3450,7 @@
 			goto out;
 		bh->next = bh + 1;
 	}
-	fsg->buffhds[FSG_NUM_BUFFERS - 1].next = &fsg->buffhds[0];
+	fsg->buffhds[fsg_num_buffers - 1].next = &fsg->buffhds[0];
 
 	/* This should reflect the actual gadget power source */
 	usb_gadget_set_selfpowered(gadget);
@@ -3580,7 +3566,9 @@
 {
 	struct fsg_dev		*fsg;
 
-	fsg = kzalloc(sizeof *fsg, GFP_KERNEL);
+	fsg = kzalloc(sizeof *fsg +
+		      fsg_num_buffers * sizeof *(fsg->buffhds), GFP_KERNEL);
+
 	if (!fsg)
 		return -ENOMEM;
 	spin_lock_init(&fsg->lock);
@@ -3598,6 +3586,10 @@
 	int		rc;
 	struct fsg_dev	*fsg;
 
+	rc = fsg_num_buffers_validate();
+	if (rc != 0)
+		return rc;
+
 	if ((rc = fsg_alloc()) != 0)
 		return rc;
 	fsg = the_fsg;
diff --git a/drivers/usb/gadget/storage_common.c b/drivers/usb/gadget/storage_common.c
index 571d973..5236262 100644
--- a/drivers/usb/gadget/storage_common.c
+++ b/drivers/usb/gadget/storage_common.c
@@ -43,6 +43,12 @@
  * characters rather then a pointer to void.
  */
 
+/*
+ * When USB_GADGET_DEBUG_FILES is defined the module param num_buffers
+ * sets the number of pipeline buffers (length of the fsg_buffhd array).
+ * The valid range of num_buffers is: num >= 2 && num <= 4.
+ */
+
 
 #include <linux/usb/storage.h>
 #include <scsi/scsi.h>
@@ -238,6 +244,8 @@
 	u32		sense_data_info;
 	u32		unit_attention_data;
 
+	unsigned int	blkbits;	/* Bits of logical block size of bound block device */
+	unsigned int	blksize;	/* logical block size of bound block device */
 	struct device	dev;
 };
 
@@ -253,8 +261,31 @@
 #define EP0_BUFSIZE	256
 #define DELAYED_STATUS	(EP0_BUFSIZE + 999)	/* An impossibly large value */
 
-/* Number of buffers we will use.  2 is enough for double-buffering */
-#define FSG_NUM_BUFFERS	2
+#ifdef CONFIG_USB_GADGET_DEBUG_FILES
+
+static unsigned int fsg_num_buffers = CONFIG_USB_GADGET_STORAGE_NUM_BUFFERS;
+module_param_named(num_buffers, fsg_num_buffers, uint, S_IRUGO);
+MODULE_PARM_DESC(num_buffers, "Number of pipeline buffers");
+
+#else
+
+/*
+ * Number of buffers we will use.
+ * 2 is usually enough for good buffering pipeline
+ */
+#define fsg_num_buffers	CONFIG_USB_GADGET_STORAGE_NUM_BUFFERS
+
+#endif /* CONFIG_USB_DEBUG */
+
+/* check if fsg_num_buffers is within a valid range */
+static inline int fsg_num_buffers_validate(void)
+{
+	if (fsg_num_buffers >= 2 && fsg_num_buffers <= 4)
+		return 0;
+	pr_err("fsg_num_buffers %u is out of range (%d to %d)\n",
+	       fsg_num_buffers, 2 ,4);
+	return -EINVAL;
+}
 
 /* Default size of buffer length. */
 #define FSG_BUFLEN	((u32)16384)
@@ -571,13 +602,24 @@
 		rc = (int) size;
 		goto out;
 	}
-	num_sectors = size >> 9;	/* File size in 512-byte blocks */
+
+	if (curlun->cdrom) {
+		curlun->blksize = 2048;
+		curlun->blkbits = 11;
+	} else if (inode->i_bdev) {
+		curlun->blksize = bdev_logical_block_size(inode->i_bdev);
+		curlun->blkbits = blksize_bits(curlun->blksize);
+	} else {
+		curlun->blksize = 512;
+		curlun->blkbits = 9;
+	}
+
+	num_sectors = size >> curlun->blkbits; /* File size in logic-block-size blocks */
 	min_sectors = 1;
 	if (curlun->cdrom) {
-		num_sectors &= ~3;	/* Reduce to a multiple of 2048 */
-		min_sectors = 300*4;	/* Smallest track is 300 frames */
-		if (num_sectors >= 256*60*75*4) {
-			num_sectors = (256*60*75 - 1) * 4;
+		min_sectors = 300;	/* Smallest track is 300 frames */
+		if (num_sectors >= 256*60*75) {
+			num_sectors = 256*60*75 - 1;
 			LINFO(curlun, "file too big: %s\n", filename);
 			LINFO(curlun, "using only first %d blocks\n",
 					(int) num_sectors);
diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c
index 3f79e7f..b08b9ca 100644
--- a/drivers/usb/musb/musb_gadget.c
+++ b/drivers/usb/musb/musb_gadget.c
@@ -634,6 +634,7 @@
 	u16			len;
 	u16			csr = musb_readw(epio, MUSB_RXCSR);
 	struct musb_hw_ep	*hw_ep = &musb->endpoints[epnum];
+	u8			use_mode_1;
 
 	if (hw_ep->is_shared_fifo)
 		musb_ep = &hw_ep->ep_in;
@@ -683,6 +684,18 @@
 
 	if (csr & MUSB_RXCSR_RXPKTRDY) {
 		len = musb_readw(epio, MUSB_RXCOUNT);
+
+		/*
+		 * Enable Mode 1 on RX transfers only when short_not_ok flag
+		 * is set. Currently short_not_ok flag is set only from
+		 * file_storage and f_mass_storage drivers
+		 */
+
+		if (request->short_not_ok && len == musb_ep->packet_sz)
+			use_mode_1 = 1;
+		else
+			use_mode_1 = 0;
+
 		if (request->actual < request->length) {
 #ifdef CONFIG_USB_INVENTRA_DMA
 			if (is_buffer_mapped(req)) {
@@ -714,37 +727,41 @@
 	 * then becomes usable as a runtime "use mode 1" hint...
 	 */
 
-				csr |= MUSB_RXCSR_DMAENAB;
-#ifdef USE_MODE1
-				csr |= MUSB_RXCSR_AUTOCLEAR;
-				/* csr |= MUSB_RXCSR_DMAMODE; */
-
-				/* this special sequence (enabling and then
-				 * disabling MUSB_RXCSR_DMAMODE) is required
-				 * to get DMAReq to activate
-				 */
-				musb_writew(epio, MUSB_RXCSR,
-					csr | MUSB_RXCSR_DMAMODE);
-#else
-				if (!musb_ep->hb_mult &&
-					musb_ep->hw_ep->rx_double_buffered)
+				/* Experimental: Mode1 works with mass storage use cases */
+				if (use_mode_1) {
 					csr |= MUSB_RXCSR_AUTOCLEAR;
-#endif
-				musb_writew(epio, MUSB_RXCSR, csr);
+					musb_writew(epio, MUSB_RXCSR, csr);
+					csr |= MUSB_RXCSR_DMAENAB;
+					musb_writew(epio, MUSB_RXCSR, csr);
+
+					/*
+					 * this special sequence (enabling and then
+					 * disabling MUSB_RXCSR_DMAMODE) is required
+					 * to get DMAReq to activate
+					 */
+					musb_writew(epio, MUSB_RXCSR,
+						csr | MUSB_RXCSR_DMAMODE);
+					musb_writew(epio, MUSB_RXCSR, csr);
+
+				} else {
+					if (!musb_ep->hb_mult &&
+						musb_ep->hw_ep->rx_double_buffered)
+						csr |= MUSB_RXCSR_AUTOCLEAR;
+					csr |= MUSB_RXCSR_DMAENAB;
+					musb_writew(epio, MUSB_RXCSR, csr);
+				}
 
 				if (request->actual < request->length) {
 					int transfer_size = 0;
-#ifdef USE_MODE1
-					transfer_size = min(request->length - request->actual,
-							channel->max_len);
-#else
-					transfer_size = min(request->length - request->actual,
-							(unsigned)len);
-#endif
-					if (transfer_size <= musb_ep->packet_sz)
-						musb_ep->dma->desired_mode = 0;
-					else
+					if (use_mode_1) {
+						transfer_size = min(request->length - request->actual,
+								channel->max_len);
 						musb_ep->dma->desired_mode = 1;
+					} else {
+						transfer_size = min(request->length - request->actual,
+								(unsigned)len);
+						musb_ep->dma->desired_mode = 0;
+					}
 
 					use_dma = c->channel_program(
 							channel,
diff --git a/include/linux/platform_data/dwc3-omap.h b/include/linux/platform_data/dwc3-omap.h
new file mode 100644
index 0000000..ada4012
--- /dev/null
+++ b/include/linux/platform_data/dwc3-omap.h
@@ -0,0 +1,47 @@
+/**
+ * dwc3-omap.h - OMAP Specific Glue layer, header.
+ *
+ * Copyright (C) 2010-2011 Texas Instruments Incorporated - http://www.ti.com
+ * All rights reserved.
+ *
+ * Author: Felipe Balbi <balbi@ti.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions, and the following disclaimer,
+ *    without modification.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The names of the above-listed copyright holders may not be used
+ *    to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2, as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+enum dwc3_omap_utmi_mode {
+	DWC3_OMAP_UTMI_MODE_UNKNOWN = 0,
+	DWC3_OMAP_UTMI_MODE_HW,
+	DWC3_OMAP_UTMI_MODE_SW,
+};
+
+struct dwc3_omap_data {
+	enum dwc3_omap_utmi_mode	utmi_mode;
+};