Merge branch 'uprobes/core' of git://git.kernel.org/pub/scm/linux/kernel/git/oleg/misc into perf/core

Pull uprobes fixes + cleanups from Oleg Nesterov.

Signed-off-by: Ingo Molnar <mingo@kernel.org>
diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci
index 34f5110..dff1f48 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci
+++ b/Documentation/ABI/testing/sysfs-bus-pci
@@ -210,3 +210,15 @@
 		firmware assigned instance number of the PCI
 		device that can help in understanding the firmware
 		intended order of the PCI device.
+
+What:		/sys/bus/pci/devices/.../d3cold_allowed
+Date:		July 2012
+Contact:	Huang Ying <ying.huang@intel.com>
+Description:
+		d3cold_allowed is bit to control whether the corresponding PCI
+		device can be put into D3Cold state.  If it is cleared, the
+		device will never be put into D3Cold state.  If it is set, the
+		device may be put into D3Cold state if other requirements are
+		satisfied too.  Reading this attribute will show the current
+		value of d3cold_allowed bit.  Writing this attribute will set
+		the value of d3cold_allowed bit.
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index afaff31..f4d8c71 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -579,7 +579,7 @@
 ----------------------------
 
 What:	at91-mci driver ("CONFIG_MMC_AT91")
-When:	3.7
+When:	3.8
 Why:	There are two mci drivers: at91-mci and atmel-mci. The PDC support
 	was added to atmel-mci as a first step to support more chips.
 	Then at91-mci was kept only for old IP versions (on at91rm9200 and
diff --git a/Documentation/watchdog/src/watchdog-test.c b/Documentation/watchdog/src/watchdog-test.c
index 73ff5cc..3da8229 100644
--- a/Documentation/watchdog/src/watchdog-test.c
+++ b/Documentation/watchdog/src/watchdog-test.c
@@ -31,7 +31,7 @@
  * or "-e" to enable the card.
  */
 
-void term(int sig)
+static void term(int sig)
 {
     close(fd);
     fprintf(stderr, "Stopping watchdog ticks...\n");
diff --git a/Makefile b/Makefile
index afbbeda..843f2f6 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 3
 PATCHLEVEL = 6
 SUBLEVEL = 0
-EXTRAVERSION = -rc3
+EXTRAVERSION = -rc5
 NAME = Saber-toothed Squirrel
 
 # *DOCUMENTATION*
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index c5f9ae5..2f88d8d 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -6,7 +6,7 @@
 	select HAVE_DMA_API_DEBUG
 	select HAVE_IDE if PCI || ISA || PCMCIA
 	select HAVE_DMA_ATTRS
-	select HAVE_DMA_CONTIGUOUS if (CPU_V6 || CPU_V6K || CPU_V7)
+	select HAVE_DMA_CONTIGUOUS if MMU
 	select HAVE_MEMBLOCK
 	select RTC_LIB
 	select SYS_SUPPORTS_APM_EMULATION
diff --git a/arch/arm/boot/dts/at91sam9g25ek.dts b/arch/arm/boot/dts/at91sam9g25ek.dts
index 7829a4d..96514c1 100644
--- a/arch/arm/boot/dts/at91sam9g25ek.dts
+++ b/arch/arm/boot/dts/at91sam9g25ek.dts
@@ -15,7 +15,7 @@
 	compatible = "atmel,at91sam9g25ek", "atmel,at91sam9x5ek", "atmel,at91sam9x5", "atmel,at91sam9";
 
 	chosen {
-		bootargs = "128M console=ttyS0,115200 root=/dev/mtdblock1 rw rootfstype=ubifs ubi.mtd=1 root=ubi0:rootfs";
+		bootargs = "console=ttyS0,115200 root=/dev/mtdblock1 rw rootfstype=ubifs ubi.mtd=1 root=ubi0:rootfs";
 	};
 
 	ahb {
diff --git a/arch/arm/configs/armadillo800eva_defconfig b/arch/arm/configs/armadillo800eva_defconfig
index 7d87184..90610c7 100644
--- a/arch/arm/configs/armadillo800eva_defconfig
+++ b/arch/arm/configs/armadillo800eva_defconfig
@@ -33,7 +33,7 @@
 CONFIG_FORCE_MAX_ZONEORDER=13
 CONFIG_ZBOOT_ROM_TEXT=0x0
 CONFIG_ZBOOT_ROM_BSS=0x0
-CONFIG_CMDLINE="console=tty0 console=ttySC1,115200 earlyprintk=sh-sci.1,115200 ignore_loglevel root=/dev/nfs ip=dhcp nfsroot=,rsize=4096,wsize=4096"
+CONFIG_CMDLINE="console=tty0 console=ttySC1,115200 earlyprintk=sh-sci.1,115200 ignore_loglevel root=/dev/nfs ip=dhcp nfsroot=,rsize=4096,wsize=4096 rw"
 CONFIG_CMDLINE_FORCE=y
 CONFIG_KEXEC=y
 CONFIG_VFP=y
diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
index 2ae842d..5c44dcb 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -203,6 +203,13 @@
 }
 
 /*
+ * This can be called during early boot to increase the size of the atomic
+ * coherent DMA pool above the default value of 256KiB. It must be called
+ * before postcore_initcall.
+ */
+extern void __init init_dma_coherent_pool_size(unsigned long size);
+
+/*
  * This can be called during boot to increase the size of the consistent
  * DMA region above it's default value of 2MB. It must be called before the
  * memory allocator is initialised, i.e. before any core_initcall.
diff --git a/arch/arm/mach-at91/at91rm9200_time.c b/arch/arm/mach-at91/at91rm9200_time.c
index 104ca40..aaa443b 100644
--- a/arch/arm/mach-at91/at91rm9200_time.c
+++ b/arch/arm/mach-at91/at91rm9200_time.c
@@ -197,7 +197,7 @@
 	at91_st_read(AT91_ST_SR);
 
 	/* Make IRQs happen for the system timer */
-	setup_irq(AT91_ID_SYS, &at91rm9200_timer_irq);
+	setup_irq(NR_IRQS_LEGACY + AT91_ID_SYS, &at91rm9200_timer_irq);
 
 	/* The 32KiHz "Slow Clock" (tick every 30517.58 nanoseconds) is used
 	 * directly for the clocksource and all clockevents, after adjusting
diff --git a/arch/arm/mach-at91/at91sam9260_devices.c b/arch/arm/mach-at91/at91sam9260_devices.c
index 7b9c2ba..bce572a 100644
--- a/arch/arm/mach-at91/at91sam9260_devices.c
+++ b/arch/arm/mach-at91/at91sam9260_devices.c
@@ -726,6 +726,8 @@
 		.flags	= IORESOURCE_MEM,
 	}, {
 		.flags	= IORESOURCE_MEM,
+	}, {
+		.flags  = IORESOURCE_IRQ,
 	},
 };
 
@@ -744,10 +746,12 @@
 	 * The second resource is needed:
 	 * GPBR will serve as the storage for RTC time offset
 	 */
-	at91sam9260_rtt_device.num_resources = 2;
+	at91sam9260_rtt_device.num_resources = 3;
 	rtt_resources[1].start = AT91SAM9260_BASE_GPBR +
 				 4 * CONFIG_RTC_DRV_AT91SAM9_GPBR;
 	rtt_resources[1].end = rtt_resources[1].start + 3;
+	rtt_resources[2].start = NR_IRQS_LEGACY + AT91_ID_SYS;
+	rtt_resources[2].end = NR_IRQS_LEGACY + AT91_ID_SYS;
 }
 #else
 static void __init at91_add_device_rtt_rtc(void)
diff --git a/arch/arm/mach-at91/at91sam9261_devices.c b/arch/arm/mach-at91/at91sam9261_devices.c
index 8df5c1bd..bc2590d 100644
--- a/arch/arm/mach-at91/at91sam9261_devices.c
+++ b/arch/arm/mach-at91/at91sam9261_devices.c
@@ -609,6 +609,8 @@
 		.flags	= IORESOURCE_MEM,
 	}, {
 		.flags	= IORESOURCE_MEM,
+	}, {
+		.flags  = IORESOURCE_IRQ,
 	}
 };
 
@@ -626,10 +628,12 @@
 	 * The second resource is needed:
 	 * GPBR will serve as the storage for RTC time offset
 	 */
-	at91sam9261_rtt_device.num_resources = 2;
+	at91sam9261_rtt_device.num_resources = 3;
 	rtt_resources[1].start = AT91SAM9261_BASE_GPBR +
 				 4 * CONFIG_RTC_DRV_AT91SAM9_GPBR;
 	rtt_resources[1].end = rtt_resources[1].start + 3;
+	rtt_resources[2].start = NR_IRQS_LEGACY + AT91_ID_SYS;
+	rtt_resources[2].end = NR_IRQS_LEGACY + AT91_ID_SYS;
 }
 #else
 static void __init at91_add_device_rtt_rtc(void)
diff --git a/arch/arm/mach-at91/at91sam9263_devices.c b/arch/arm/mach-at91/at91sam9263_devices.c
index eb6bbf8..9b6ca73 100644
--- a/arch/arm/mach-at91/at91sam9263_devices.c
+++ b/arch/arm/mach-at91/at91sam9263_devices.c
@@ -990,6 +990,8 @@
 		.flags	= IORESOURCE_MEM,
 	}, {
 		.flags	= IORESOURCE_MEM,
+	}, {
+		.flags  = IORESOURCE_IRQ,
 	}
 };
 
@@ -1006,6 +1008,8 @@
 		.flags	= IORESOURCE_MEM,
 	}, {
 		.flags	= IORESOURCE_MEM,
+	}, {
+		.flags  = IORESOURCE_IRQ,
 	}
 };
 
@@ -1027,14 +1031,14 @@
 		 * The second resource is needed only for the chosen RTT:
 		 * GPBR will serve as the storage for RTC time offset
 		 */
-		at91sam9263_rtt0_device.num_resources = 2;
+		at91sam9263_rtt0_device.num_resources = 3;
 		at91sam9263_rtt1_device.num_resources = 1;
 		pdev = &at91sam9263_rtt0_device;
 		r = rtt0_resources;
 		break;
 	case 1:
 		at91sam9263_rtt0_device.num_resources = 1;
-		at91sam9263_rtt1_device.num_resources = 2;
+		at91sam9263_rtt1_device.num_resources = 3;
 		pdev = &at91sam9263_rtt1_device;
 		r = rtt1_resources;
 		break;
@@ -1047,6 +1051,8 @@
 	pdev->name = "rtc-at91sam9";
 	r[1].start = AT91SAM9263_BASE_GPBR + 4 * CONFIG_RTC_DRV_AT91SAM9_GPBR;
 	r[1].end = r[1].start + 3;
+	r[2].start = NR_IRQS_LEGACY + AT91_ID_SYS;
+	r[2].end = NR_IRQS_LEGACY + AT91_ID_SYS;
 }
 #else
 static void __init at91_add_device_rtt_rtc(void)
diff --git a/arch/arm/mach-at91/at91sam9g45_devices.c b/arch/arm/mach-at91/at91sam9g45_devices.c
index 0607399..1b47319 100644
--- a/arch/arm/mach-at91/at91sam9g45_devices.c
+++ b/arch/arm/mach-at91/at91sam9g45_devices.c
@@ -1293,6 +1293,8 @@
 		.flags	= IORESOURCE_MEM,
 	}, {
 		.flags	= IORESOURCE_MEM,
+	}, {
+		.flags  = IORESOURCE_IRQ,
 	}
 };
 
@@ -1310,10 +1312,12 @@
 	 * The second resource is needed:
 	 * GPBR will serve as the storage for RTC time offset
 	 */
-	at91sam9g45_rtt_device.num_resources = 2;
+	at91sam9g45_rtt_device.num_resources = 3;
 	rtt_resources[1].start = AT91SAM9G45_BASE_GPBR +
 				 4 * CONFIG_RTC_DRV_AT91SAM9_GPBR;
 	rtt_resources[1].end = rtt_resources[1].start + 3;
+	rtt_resources[2].start = NR_IRQS_LEGACY + AT91_ID_SYS;
+	rtt_resources[2].end = NR_IRQS_LEGACY + AT91_ID_SYS;
 }
 #else
 static void __init at91_add_device_rtt_rtc(void)
diff --git a/arch/arm/mach-at91/at91sam9rl_devices.c b/arch/arm/mach-at91/at91sam9rl_devices.c
index f09fff9..b3d365d 100644
--- a/arch/arm/mach-at91/at91sam9rl_devices.c
+++ b/arch/arm/mach-at91/at91sam9rl_devices.c
@@ -688,6 +688,8 @@
 		.flags	= IORESOURCE_MEM,
 	}, {
 		.flags	= IORESOURCE_MEM,
+	}, {
+		.flags  = IORESOURCE_IRQ,
 	}
 };
 
@@ -705,10 +707,12 @@
 	 * The second resource is needed:
 	 * GPBR will serve as the storage for RTC time offset
 	 */
-	at91sam9rl_rtt_device.num_resources = 2;
+	at91sam9rl_rtt_device.num_resources = 3;
 	rtt_resources[1].start = AT91SAM9RL_BASE_GPBR +
 				 4 * CONFIG_RTC_DRV_AT91SAM9_GPBR;
 	rtt_resources[1].end = rtt_resources[1].start + 3;
+	rtt_resources[2].start = NR_IRQS_LEGACY + AT91_ID_SYS;
+	rtt_resources[2].end = NR_IRQS_LEGACY + AT91_ID_SYS;
 }
 #else
 static void __init at91_add_device_rtt_rtc(void)
diff --git a/arch/arm/mach-at91/clock.c b/arch/arm/mach-at91/clock.c
index de2ec6b..188c829 100644
--- a/arch/arm/mach-at91/clock.c
+++ b/arch/arm/mach-at91/clock.c
@@ -63,6 +63,12 @@
 
 #define cpu_has_300M_plla()	(cpu_is_at91sam9g10())
 
+#define cpu_has_240M_plla()	(cpu_is_at91sam9261() \
+				|| cpu_is_at91sam9263() \
+				|| cpu_is_at91sam9rl())
+
+#define cpu_has_210M_plla()	(cpu_is_at91sam9260())
+
 #define cpu_has_pllb()		(!(cpu_is_at91sam9rl() \
 				|| cpu_is_at91sam9g45() \
 				|| cpu_is_at91sam9x5() \
@@ -706,6 +712,12 @@
 	} else if (cpu_has_800M_plla()) {
 		if (plla.rate_hz > 800000000)
 			pll_overclock = true;
+	} else if (cpu_has_240M_plla()) {
+		if (plla.rate_hz > 240000000)
+			pll_overclock = true;
+	} else if (cpu_has_210M_plla()) {
+		if (plla.rate_hz > 210000000)
+			pll_overclock = true;
 	} else {
 		if (plla.rate_hz > 209000000)
 			pll_overclock = true;
diff --git a/arch/arm/mach-gemini/irq.c b/arch/arm/mach-gemini/irq.c
index ca70e5f..020852d 100644
--- a/arch/arm/mach-gemini/irq.c
+++ b/arch/arm/mach-gemini/irq.c
@@ -17,6 +17,7 @@
 #include <linux/sched.h>
 #include <asm/irq.h>
 #include <asm/mach/irq.h>
+#include <asm/system_misc.h>
 #include <mach/hardware.h>
 
 #define IRQ_SOURCE(base_addr)	(base_addr + 0x00)
diff --git a/arch/arm/mach-kirkwood/common.c b/arch/arm/mach-kirkwood/common.c
index 3226077..1201191 100644
--- a/arch/arm/mach-kirkwood/common.c
+++ b/arch/arm/mach-kirkwood/common.c
@@ -517,6 +517,13 @@
 void __init kirkwood_init_early(void)
 {
 	orion_time_set_base(TIMER_VIRT_BASE);
+
+	/*
+	 * Some Kirkwood devices allocate their coherent buffers from atomic
+	 * context. Increase size of atomic coherent pool to make sure such
+	 * the allocations won't fail.
+	 */
+	init_dma_coherent_pool_size(SZ_1M);
 }
 
 int kirkwood_tclk;
diff --git a/arch/arm/mach-kirkwood/db88f6281-bp-setup.c b/arch/arm/mach-kirkwood/db88f6281-bp-setup.c
index d933593..be90b7d 100644
--- a/arch/arm/mach-kirkwood/db88f6281-bp-setup.c
+++ b/arch/arm/mach-kirkwood/db88f6281-bp-setup.c
@@ -10,6 +10,7 @@
 
 #include <linux/kernel.h>
 #include <linux/init.h>
+#include <linux/sizes.h>
 #include <linux/platform_device.h>
 #include <linux/mtd/partitions.h>
 #include <linux/ata_platform.h>
diff --git a/arch/arm/mach-shmobile/board-armadillo800eva.c b/arch/arm/mach-shmobile/board-armadillo800eva.c
index cf10f92..453a6e5 100644
--- a/arch/arm/mach-shmobile/board-armadillo800eva.c
+++ b/arch/arm/mach-shmobile/board-armadillo800eva.c
@@ -520,13 +520,14 @@
 };
 
 /* GPIO KEY */
-#define GPIO_KEY(c, g, d) { .code = c, .gpio = g, .desc = d, .active_low = 1 }
+#define GPIO_KEY(c, g, d, ...) \
+	{ .code = c, .gpio = g, .desc = d, .active_low = 1, __VA_ARGS__ }
 
 static struct gpio_keys_button gpio_buttons[] = {
-	GPIO_KEY(KEY_POWER,	GPIO_PORT99,	"SW1"),
-	GPIO_KEY(KEY_BACK,	GPIO_PORT100,	"SW2"),
-	GPIO_KEY(KEY_MENU,	GPIO_PORT97,	"SW3"),
-	GPIO_KEY(KEY_HOME,	GPIO_PORT98,	"SW4"),
+	GPIO_KEY(KEY_POWER,	GPIO_PORT99,	"SW3", .wakeup = 1),
+	GPIO_KEY(KEY_BACK,	GPIO_PORT100,	"SW4"),
+	GPIO_KEY(KEY_MENU,	GPIO_PORT97,	"SW5"),
+	GPIO_KEY(KEY_HOME,	GPIO_PORT98,	"SW6"),
 };
 
 static struct gpio_keys_platform_data gpio_key_info = {
@@ -901,8 +902,8 @@
 	&camera_device,
 	&ceu0_device,
 	&fsi_device,
-	&fsi_hdmi_device,
 	&fsi_wm8978_device,
+	&fsi_hdmi_device,
 };
 
 static void __init eva_clock_init(void)
diff --git a/arch/arm/mach-shmobile/board-mackerel.c b/arch/arm/mach-shmobile/board-mackerel.c
index 7ea2b31..c129542 100644
--- a/arch/arm/mach-shmobile/board-mackerel.c
+++ b/arch/arm/mach-shmobile/board-mackerel.c
@@ -695,6 +695,7 @@
  *  - J30 "open"
  *  - modify usbhs1_get_id() USBHS_HOST -> USBHS_GADGET
  *  - add .get_vbus = usbhs_get_vbus in usbhs1_private
+ *  - check usbhs0_device(pio)/usbhs1_device(irq) order in mackerel_devices.
  */
 #define IRQ8 evt2irq(0x0300)
 #define USB_PHY_MODE		(1 << 4)
@@ -1325,8 +1326,8 @@
 	&nor_flash_device,
 	&smc911x_device,
 	&lcdc_device,
-	&usbhs1_device,
 	&usbhs0_device,
+	&usbhs1_device,
 	&leds_device,
 	&fsi_device,
 	&fsi_ak4643_device,
diff --git a/arch/arm/mach-shmobile/board-marzen.c b/arch/arm/mach-shmobile/board-marzen.c
index 3a528cf..fcf5a47 100644
--- a/arch/arm/mach-shmobile/board-marzen.c
+++ b/arch/arm/mach-shmobile/board-marzen.c
@@ -67,7 +67,7 @@
 
 static struct platform_device eth_device = {
 	.name		= "smsc911x",
-	.id		= 0,
+	.id		= -1,
 	.dev  = {
 		.platform_data = &smsc911x_platdata,
 	},
diff --git a/arch/arm/mach-shmobile/intc-sh73a0.c b/arch/arm/mach-shmobile/intc-sh73a0.c
index ee44740..588555a 100644
--- a/arch/arm/mach-shmobile/intc-sh73a0.c
+++ b/arch/arm/mach-shmobile/intc-sh73a0.c
@@ -259,9 +259,9 @@
 	return 0; /* always allow wakeup */
 }
 
-#define RELOC_BASE 0x1000
+#define RELOC_BASE 0x1200
 
-/* INTCA IRQ pins at INTCS + 0x1000 to make space for GIC+INTC handling */
+/* INTCA IRQ pins at INTCS + RELOC_BASE to make space for GIC+INTC handling */
 #define INTCS_VECT_RELOC(n, vect) INTCS_VECT((n), (vect) + RELOC_BASE)
 
 INTC_IRQ_PINS_32(intca_irq_pins, 0xe6900000,
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 4e7d118..051204f 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -267,17 +267,19 @@
 	vunmap(cpu_addr);
 }
 
+#define DEFAULT_DMA_COHERENT_POOL_SIZE	SZ_256K
+
 struct dma_pool {
 	size_t size;
 	spinlock_t lock;
 	unsigned long *bitmap;
 	unsigned long nr_pages;
 	void *vaddr;
-	struct page *page;
+	struct page **pages;
 };
 
 static struct dma_pool atomic_pool = {
-	.size = SZ_256K,
+	.size = DEFAULT_DMA_COHERENT_POOL_SIZE,
 };
 
 static int __init early_coherent_pool(char *p)
@@ -287,6 +289,21 @@
 }
 early_param("coherent_pool", early_coherent_pool);
 
+void __init init_dma_coherent_pool_size(unsigned long size)
+{
+	/*
+	 * Catch any attempt to set the pool size too late.
+	 */
+	BUG_ON(atomic_pool.vaddr);
+
+	/*
+	 * Set architecture specific coherent pool size only if
+	 * it has not been changed by kernel command line parameter.
+	 */
+	if (atomic_pool.size == DEFAULT_DMA_COHERENT_POOL_SIZE)
+		atomic_pool.size = size;
+}
+
 /*
  * Initialise the coherent pool for atomic allocations.
  */
@@ -297,6 +314,7 @@
 	unsigned long nr_pages = pool->size >> PAGE_SHIFT;
 	unsigned long *bitmap;
 	struct page *page;
+	struct page **pages;
 	void *ptr;
 	int bitmap_size = BITS_TO_LONGS(nr_pages) * sizeof(long);
 
@@ -304,21 +322,31 @@
 	if (!bitmap)
 		goto no_bitmap;
 
+	pages = kzalloc(nr_pages * sizeof(struct page *), GFP_KERNEL);
+	if (!pages)
+		goto no_pages;
+
 	if (IS_ENABLED(CONFIG_CMA))
 		ptr = __alloc_from_contiguous(NULL, pool->size, prot, &page);
 	else
 		ptr = __alloc_remap_buffer(NULL, pool->size, GFP_KERNEL, prot,
 					   &page, NULL);
 	if (ptr) {
+		int i;
+
+		for (i = 0; i < nr_pages; i++)
+			pages[i] = page + i;
+
 		spin_lock_init(&pool->lock);
 		pool->vaddr = ptr;
-		pool->page = page;
+		pool->pages = pages;
 		pool->bitmap = bitmap;
 		pool->nr_pages = nr_pages;
 		pr_info("DMA: preallocated %u KiB pool for atomic coherent allocations\n",
 		       (unsigned)pool->size / 1024);
 		return 0;
 	}
+no_pages:
 	kfree(bitmap);
 no_bitmap:
 	pr_err("DMA: failed to allocate %u KiB pool for atomic coherent allocation\n",
@@ -443,27 +471,45 @@
 	if (pageno < pool->nr_pages) {
 		bitmap_set(pool->bitmap, pageno, count);
 		ptr = pool->vaddr + PAGE_SIZE * pageno;
-		*ret_page = pool->page + pageno;
+		*ret_page = pool->pages[pageno];
+	} else {
+		pr_err_once("ERROR: %u KiB atomic DMA coherent pool is too small!\n"
+			    "Please increase it with coherent_pool= kernel parameter!\n",
+			    (unsigned)pool->size / 1024);
 	}
 	spin_unlock_irqrestore(&pool->lock, flags);
 
 	return ptr;
 }
 
+static bool __in_atomic_pool(void *start, size_t size)
+{
+	struct dma_pool *pool = &atomic_pool;
+	void *end = start + size;
+	void *pool_start = pool->vaddr;
+	void *pool_end = pool->vaddr + pool->size;
+
+	if (start < pool_start || start > pool_end)
+		return false;
+
+	if (end <= pool_end)
+		return true;
+
+	WARN(1, "Wrong coherent size(%p-%p) from atomic pool(%p-%p)\n",
+	     start, end - 1, pool_start, pool_end - 1);
+
+	return false;
+}
+
 static int __free_from_pool(void *start, size_t size)
 {
 	struct dma_pool *pool = &atomic_pool;
 	unsigned long pageno, count;
 	unsigned long flags;
 
-	if (start < pool->vaddr || start > pool->vaddr + pool->size)
+	if (!__in_atomic_pool(start, size))
 		return 0;
 
-	if (start + size > pool->vaddr + pool->size) {
-		WARN(1, "freeing wrong coherent size from pool\n");
-		return 0;
-	}
-
 	pageno = (start - pool->vaddr) >> PAGE_SHIFT;
 	count = size >> PAGE_SHIFT;
 
@@ -1090,10 +1136,22 @@
 	return 0;
 }
 
+static struct page **__atomic_get_pages(void *addr)
+{
+	struct dma_pool *pool = &atomic_pool;
+	struct page **pages = pool->pages;
+	int offs = (addr - pool->vaddr) >> PAGE_SHIFT;
+
+	return pages + offs;
+}
+
 static struct page **__iommu_get_pages(void *cpu_addr, struct dma_attrs *attrs)
 {
 	struct vm_struct *area;
 
+	if (__in_atomic_pool(cpu_addr, PAGE_SIZE))
+		return __atomic_get_pages(cpu_addr);
+
 	if (dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs))
 		return cpu_addr;
 
@@ -1103,6 +1161,34 @@
 	return NULL;
 }
 
+static void *__iommu_alloc_atomic(struct device *dev, size_t size,
+				  dma_addr_t *handle)
+{
+	struct page *page;
+	void *addr;
+
+	addr = __alloc_from_pool(size, &page);
+	if (!addr)
+		return NULL;
+
+	*handle = __iommu_create_mapping(dev, &page, size);
+	if (*handle == DMA_ERROR_CODE)
+		goto err_mapping;
+
+	return addr;
+
+err_mapping:
+	__free_from_pool(addr, size);
+	return NULL;
+}
+
+static void __iommu_free_atomic(struct device *dev, struct page **pages,
+				dma_addr_t handle, size_t size)
+{
+	__iommu_remove_mapping(dev, handle, size);
+	__free_from_pool(page_address(pages[0]), size);
+}
+
 static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
 	    dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs)
 {
@@ -1113,6 +1199,9 @@
 	*handle = DMA_ERROR_CODE;
 	size = PAGE_ALIGN(size);
 
+	if (gfp & GFP_ATOMIC)
+		return __iommu_alloc_atomic(dev, size, handle);
+
 	pages = __iommu_alloc_buffer(dev, size, gfp);
 	if (!pages)
 		return NULL;
@@ -1179,6 +1268,11 @@
 		return;
 	}
 
+	if (__in_atomic_pool(cpu_addr, size)) {
+		__iommu_free_atomic(dev, pages, handle, size);
+		return;
+	}
+
 	if (!dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs)) {
 		unmap_kernel_range((unsigned long)cpu_addr, size);
 		vunmap(cpu_addr);
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index 6c6defc..af9cf30 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -141,7 +141,7 @@
 
 #define atomic_sub_and_test(i,v)	(atomic_sub_return((i),(v)) == 0)
 
-#define ATOMIC_INIT(i)	((atomic_t) { (i) })
+#define ATOMIC_INIT(i)	{ (i) }
 
 #define smp_mb__before_atomic_dec()	smp_mb()
 #define smp_mb__after_atomic_dec()	smp_mb()
@@ -150,7 +150,7 @@
 
 #ifdef CONFIG_64BIT
 
-#define ATOMIC64_INIT(i) ((atomic64_t) { (i) })
+#define ATOMIC64_INIT(i) { (i) }
 
 static __inline__ s64
 __atomic64_add_return(s64 i, atomic64_t *v)
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index d4b94b3..2c05a929 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -309,7 +309,7 @@
 		cregs->ksp = (unsigned long)stack
 			+ (pregs->gr[21] & (THREAD_SIZE - 1));
 		cregs->gr[30] = usp;
-		if (p->personality == PER_HPUX) {
+		if (personality(p->personality) == PER_HPUX) {
 #ifdef CONFIG_HPUX
 			cregs->kpc = (unsigned long) &hpux_child_return;
 #else
diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c
index c9b9322..7426e40 100644
--- a/arch/parisc/kernel/sys_parisc.c
+++ b/arch/parisc/kernel/sys_parisc.c
@@ -225,12 +225,12 @@
 	long err;
 
 	if (personality(current->personality) == PER_LINUX32
-	    && personality == PER_LINUX)
-		personality = PER_LINUX32;
+	    && personality(personality) == PER_LINUX)
+		personality = (personality & ~PER_MASK) | PER_LINUX32;
 
 	err = sys_personality(personality);
-	if (err == PER_LINUX32)
-		err = PER_LINUX;
+	if (personality(err) == PER_LINUX32)
+		err = (err & ~PER_MASK) | PER_LINUX;
 
 	return err;
 }
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 53b6dfa..54b73a2 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -386,6 +386,7 @@
 enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF};
 
 extern int powersave_nap;	/* set if nap mode can be used in idle loop */
+extern void power7_nap(void);
 
 #ifdef CONFIG_PSERIES_IDLE
 extern void update_smt_snooze_delay(int snooze);
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 85b05c4..e899572 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -76,6 +76,7 @@
 	DEFINE(SIGSEGV, SIGSEGV);
 	DEFINE(NMI_MASK, NMI_MASK);
 	DEFINE(THREAD_DSCR, offsetof(struct thread_struct, dscr));
+	DEFINE(THREAD_DSCR_INHERIT, offsetof(struct thread_struct, dscr_inherit));
 #else
 	DEFINE(THREAD_INFO, offsetof(struct task_struct, stack));
 #endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c
index 5b25c80..a892680 100644
--- a/arch/powerpc/kernel/dbell.c
+++ b/arch/powerpc/kernel/dbell.c
@@ -28,6 +28,8 @@
 
 void doorbell_cause_ipi(int cpu, unsigned long data)
 {
+	/* Order previous accesses vs. msgsnd, which is treated as a store */
+	mb();
 	ppc_msgsnd(PPC_DBELL, 0, data);
 }
 
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 4b01a25..b40e0b4 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -370,6 +370,12 @@
 	li	r3,0
 	b	syscall_exit
 
+	.section	".toc","aw"
+DSCR_DEFAULT:
+	.tc dscr_default[TC],dscr_default
+
+	.section	".text"
+
 /*
  * This routine switches between two different tasks.  The process
  * state of one is saved on its kernel stack.  Then the state
@@ -509,9 +515,6 @@
 	mr	r1,r8		/* start using new stack pointer */
 	std	r7,PACAKSAVE(r13)
 
-	ld	r6,_CCR(r1)
-	mtcrf	0xFF,r6
-
 #ifdef CONFIG_ALTIVEC
 BEGIN_FTR_SECTION
 	ld	r0,THREAD_VRSAVE(r4)
@@ -520,14 +523,22 @@
 #endif /* CONFIG_ALTIVEC */
 #ifdef CONFIG_PPC64
 BEGIN_FTR_SECTION
+	lwz	r6,THREAD_DSCR_INHERIT(r4)
+	ld	r7,DSCR_DEFAULT@toc(2)
 	ld	r0,THREAD_DSCR(r4)
-	cmpd	r0,r25
-	beq	1f
+	cmpwi	r6,0
+	bne	1f
+	ld	r0,0(r7)
+1:	cmpd	r0,r25
+	beq	2f
 	mtspr	SPRN_DSCR,r0
-1:	
+2:
 END_FTR_SECTION_IFSET(CPU_FTR_DSCR)
 #endif
 
+	ld	r6,_CCR(r1)
+	mtcrf	0xFF,r6
+
 	/* r3-r13 are destroyed -- Cort */
 	REST_8GPRS(14, r1)
 	REST_10GPRS(22, r1)
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index e894515..39aa97d 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -186,7 +186,7 @@
 	KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x800)
 
 	MASKABLE_EXCEPTION_PSERIES(0x900, 0x900, decrementer)
-	MASKABLE_EXCEPTION_HV(0x980, 0x982, decrementer)
+	STD_EXCEPTION_HV(0x980, 0x982, hdecrementer)
 
 	STD_EXCEPTION_PSERIES(0xa00, 0xa00, trap_0a)
 	KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xa00)
@@ -486,6 +486,7 @@
 
 	STD_EXCEPTION_COMMON_ASYNC(0x500, hardware_interrupt, do_IRQ)
 	STD_EXCEPTION_COMMON_ASYNC(0x900, decrementer, .timer_interrupt)
+	STD_EXCEPTION_COMMON(0x980, hdecrementer, .hdec_interrupt)
 	STD_EXCEPTION_COMMON(0xa00, trap_0a, .unknown_exception)
 	STD_EXCEPTION_COMMON(0xb00, trap_0b, .unknown_exception)
 	STD_EXCEPTION_COMMON(0xd00, single_step, .single_step_exception)
diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S
index 7140d83..e11863f 100644
--- a/arch/powerpc/kernel/idle_power7.S
+++ b/arch/powerpc/kernel/idle_power7.S
@@ -28,7 +28,9 @@
 	lwz	r4,ADDROFF(powersave_nap)(r3)
 	cmpwi	0,r4,0
 	beqlr
+	/* fall through */
 
+_GLOBAL(power7_nap)
 	/* NAP is a state loss, we create a regs frame on the
 	 * stack, fill it up with the state we care about and
 	 * stick a pointer to it in PACAR1. We really only
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 710f400..1a1f2dd 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -802,16 +802,8 @@
 #endif /* CONFIG_PPC_STD_MMU_64 */
 #ifdef CONFIG_PPC64 
 	if (cpu_has_feature(CPU_FTR_DSCR)) {
-		if (current->thread.dscr_inherit) {
-			p->thread.dscr_inherit = 1;
-			p->thread.dscr = current->thread.dscr;
-		} else if (0 != dscr_default) {
-			p->thread.dscr_inherit = 1;
-			p->thread.dscr = dscr_default;
-		} else {
-			p->thread.dscr_inherit = 0;
-			p->thread.dscr = 0;
-		}
+		p->thread.dscr_inherit = current->thread.dscr_inherit;
+		p->thread.dscr = current->thread.dscr;
 	}
 #endif
 
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 0321007..8d4214a 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -198,8 +198,15 @@
 	struct cpu_messages *info = &per_cpu(ipi_message, cpu);
 	char *message = (char *)&info->messages;
 
+	/*
+	 * Order previous accesses before accesses in the IPI handler.
+	 */
+	smp_mb();
 	message[msg] = 1;
-	mb();
+	/*
+	 * cause_ipi functions are required to include a full barrier
+	 * before doing whatever causes the IPI.
+	 */
 	smp_ops->cause_ipi(cpu, info->data);
 }
 
@@ -211,7 +218,7 @@
 	mb();	/* order any irq clear */
 
 	do {
-		all = xchg_local(&info->messages, 0);
+		all = xchg(&info->messages, 0);
 
 #ifdef __BIG_ENDIAN
 		if (all & (1 << (24 - 8 * PPC_MSG_CALL_FUNCTION)))
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 3529446..8302af6 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -194,6 +194,14 @@
 	return sprintf(buf, "%lx\n", dscr_default);
 }
 
+static void update_dscr(void *dummy)
+{
+	if (!current->thread.dscr_inherit) {
+		current->thread.dscr = dscr_default;
+		mtspr(SPRN_DSCR, dscr_default);
+	}
+}
+
 static ssize_t __used store_dscr_default(struct device *dev,
 		struct device_attribute *attr, const char *buf,
 		size_t count)
@@ -206,6 +214,8 @@
 		return -EINVAL;
 	dscr_default = val;
 
+	on_each_cpu(update_dscr, NULL, 1);
+
 	return count;
 }
 
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index be171ee..e49e931 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -535,6 +535,15 @@
 	trace_timer_interrupt_exit(regs);
 }
 
+/*
+ * Hypervisor decrementer interrupts shouldn't occur but are sometimes
+ * left pending on exit from a KVM guest.  We don't need to do anything
+ * to clear them, as they are edge-triggered.
+ */
+void hdec_interrupt(struct pt_regs *regs)
+{
+}
+
 #ifdef CONFIG_SUSPEND
 static void generic_suspend_disable_irqs(void)
 {
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 1589723..ae0843f 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -972,8 +972,9 @@
 			cpu_has_feature(CPU_FTR_DSCR)) {
 		PPC_WARN_EMULATED(mtdscr, regs);
 		rd = (instword >> 21) & 0x1f;
-		mtspr(SPRN_DSCR, regs->gpr[rd]);
+		current->thread.dscr = regs->gpr[rd];
 		current->thread.dscr_inherit = 1;
+		mtspr(SPRN_DSCR, current->thread.dscr);
 		return 0;
 	}
 #endif
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index dd223b3..17e5b23 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -20,7 +20,7 @@
 {
 	int err;
 
-	err = __put_user(instr, addr);
+	__put_user_size(instr, addr, 4, err);
 	if (err)
 		return err;
 	asm ("dcbst 0, %0; sync; icbi 0,%0; sync; isync" : : "r" (addr));
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 39b1597..59213cf 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1436,11 +1436,11 @@
 
 /*
  * Update the node maps and sysfs entries for each cpu whose home node
- * has changed.
+ * has changed. Returns 1 when the topology has changed, and 0 otherwise.
  */
 int arch_update_cpu_topology(void)
 {
-	int cpu, nid, old_nid;
+	int cpu, nid, old_nid, changed = 0;
 	unsigned int associativity[VPHN_ASSOC_BUFSIZE] = {0};
 	struct device *dev;
 
@@ -1466,9 +1466,10 @@
 		dev = get_cpu_device(cpu);
 		if (dev)
 			kobject_uevent(&dev->kobj, KOBJ_CHANGE);
+		changed = 1;
 	}
 
-	return 1;
+	return changed;
 }
 
 static void topology_work_fn(struct work_struct *work)
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index 3ef4625..7698b6e 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -106,14 +106,6 @@
 {
 	unsigned int cpu;
 
-	/* If powersave_nap is enabled, use NAP mode, else just
-	 * spin aimlessly
-	 */
-	if (!powersave_nap) {
-		generic_mach_cpu_die();
-		return;
-	}
-
 	/* Standard hot unplug procedure */
 	local_irq_disable();
 	idle_task_exit();
@@ -128,7 +120,7 @@
 	 */
 	mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1);
 	while (!generic_check_cpu_restart(cpu)) {
-		power7_idle();
+		power7_nap();
 		if (!generic_check_cpu_restart(cpu)) {
 			DBG("CPU%d Unexpected exit while offline !\n", cpu);
 			/* We may be getting an IPI, so we re-enable
diff --git a/arch/powerpc/sysdev/xics/icp-hv.c b/arch/powerpc/sysdev/xics/icp-hv.c
index 14469cf..df0fc58 100644
--- a/arch/powerpc/sysdev/xics/icp-hv.c
+++ b/arch/powerpc/sysdev/xics/icp-hv.c
@@ -65,7 +65,11 @@
 static inline void icp_hv_set_qirr(int n_cpu , u8 value)
 {
 	int hw_cpu = get_hard_smp_processor_id(n_cpu);
-	long rc = plpar_hcall_norets(H_IPI, hw_cpu, value);
+	long rc;
+
+	/* Make sure all previous accesses are ordered before IPI sending */
+	mb();
+	rc = plpar_hcall_norets(H_IPI, hw_cpu, value);
 	if (rc != H_SUCCESS) {
 		pr_err("%s: bad return code qirr cpu=%d hw_cpu=%d mfrr=0x%x "
 			"returned %ld\n", __func__, n_cpu, hw_cpu, value, rc);
diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
index 32e8449..9b94a16 100644
--- a/arch/s390/include/asm/elf.h
+++ b/arch/s390/include/asm/elf.h
@@ -180,7 +180,8 @@
 #define ELF_PLATFORM (elf_platform)
 
 #ifndef CONFIG_64BIT
-#define SET_PERSONALITY(ex) set_personality(PER_LINUX)
+#define SET_PERSONALITY(ex) \
+	set_personality(PER_LINUX | (current->personality & (~PER_MASK)))
 #else /* CONFIG_64BIT */
 #define SET_PERSONALITY(ex)					\
 do {								\
diff --git a/arch/s390/include/asm/posix_types.h b/arch/s390/include/asm/posix_types.h
index 7bcc14e..bf2a2ad 100644
--- a/arch/s390/include/asm/posix_types.h
+++ b/arch/s390/include/asm/posix_types.h
@@ -13,6 +13,7 @@
  */
 
 typedef unsigned long   __kernel_size_t;
+typedef long            __kernel_ssize_t;
 #define __kernel_size_t __kernel_size_t
 
 typedef unsigned short	__kernel_old_dev_t;
@@ -25,7 +26,6 @@
 typedef unsigned short  __kernel_ipc_pid_t;
 typedef unsigned short  __kernel_uid_t;
 typedef unsigned short  __kernel_gid_t;
-typedef int             __kernel_ssize_t;
 typedef int             __kernel_ptrdiff_t;
 
 #else /* __s390x__ */
@@ -35,7 +35,6 @@
 typedef int             __kernel_ipc_pid_t;
 typedef unsigned int    __kernel_uid_t;
 typedef unsigned int    __kernel_gid_t;
-typedef long            __kernel_ssize_t;
 typedef long            __kernel_ptrdiff_t;
 typedef unsigned long   __kernel_sigset_t;      /* at least 32 bits */
 
diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h
index a0a8340..ce26ac3 100644
--- a/arch/s390/include/asm/smp.h
+++ b/arch/s390/include/asm/smp.h
@@ -44,6 +44,7 @@
 }
 
 static inline int smp_find_processor_id(int address) { return 0; }
+static inline int smp_store_status(int cpu) { return 0; }
 static inline int smp_vcpu_scheduled(int cpu) { return 1; }
 static inline void smp_yield_cpu(int cpu) { }
 static inline void smp_yield(void) { }
diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c
index a1e9d69..584b936 100644
--- a/arch/s390/oprofile/init.c
+++ b/arch/s390/oprofile/init.c
@@ -169,7 +169,7 @@
 	if (*offset)
 		return -EINVAL;
 	retval = oprofilefs_ulong_from_user(&val, buf, count);
-	if (retval)
+	if (retval <= 0)
 		return retval;
 	if (val < oprofile_min_interval)
 		oprofile_hw_interval = oprofile_min_interval;
@@ -212,7 +212,7 @@
 		return -EINVAL;
 
 	retval = oprofilefs_ulong_from_user(&val, buf, count);
-	if (retval)
+	if (retval <= 0)
 		return retval;
 	if (val != 0)
 		return -EINVAL;
@@ -243,7 +243,7 @@
 		return -EINVAL;
 
 	retval = oprofilefs_ulong_from_user(&val, buf, count);
-	if (retval)
+	if (retval <= 0)
 		return retval;
 
 	if (val != 0 && val != 1)
@@ -278,7 +278,7 @@
 		return -EINVAL;
 
 	retval = oprofilefs_ulong_from_user(&val, buf, count);
-	if (retval)
+	if (retval <= 0)
 		return retval;
 
 	if (val != 0 && val != 1)
@@ -317,7 +317,7 @@
 		return -EINVAL;
 
 	retval = oprofilefs_ulong_from_user(&val, buf, count);
-	if (retval)
+	if (retval <= 0)
 		return retval;
 
 	if (val != 0 && val != 1)
diff --git a/arch/um/os-Linux/time.c b/arch/um/os-Linux/time.c
index f602385..0748fe0 100644
--- a/arch/um/os-Linux/time.c
+++ b/arch/um/os-Linux/time.c
@@ -114,7 +114,7 @@
 	skew += this_tick - last_tick;
 
 	while (skew >= one_tick) {
-		alarm_handler(SIGVTALRM, NULL);
+		alarm_handler(SIGVTALRM, NULL, NULL);
 		skew -= one_tick;
 	}
 
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 0a55710..62ec3e6 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -2341,6 +2341,27 @@
 	return ret;
 }
 
+static ssize_t uncore_get_attr_cpumask(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &uncore_cpu_mask);
+
+	buf[n++] = '\n';
+	buf[n] = '\0';
+	return n;
+}
+
+static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL);
+
+static struct attribute *uncore_pmu_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL,
+};
+
+static struct attribute_group uncore_pmu_attr_group = {
+	.attrs = uncore_pmu_attrs,
+};
+
 static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu)
 {
 	int ret;
@@ -2378,8 +2399,8 @@
 		free_percpu(type->pmus[i].box);
 	kfree(type->pmus);
 	type->pmus = NULL;
-	kfree(type->attr_groups[1]);
-	type->attr_groups[1] = NULL;
+	kfree(type->events_group);
+	type->events_group = NULL;
 }
 
 static void __init uncore_types_exit(struct intel_uncore_type **types)
@@ -2431,9 +2452,10 @@
 		for (j = 0; j < i; j++)
 			attrs[j] = &type->event_descs[j].attr.attr;
 
-		type->attr_groups[1] = events_group;
+		type->events_group = events_group;
 	}
 
+	type->pmu_group = &uncore_pmu_attr_group;
 	type->pmus = pmus;
 	return 0;
 fail:
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index 5b81c18..e68a455 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -369,10 +369,12 @@
 	struct intel_uncore_pmu *pmus;
 	struct intel_uncore_ops *ops;
 	struct uncore_event_desc *event_descs;
-	const struct attribute_group *attr_groups[3];
+	const struct attribute_group *attr_groups[4];
 };
 
-#define format_group attr_groups[0]
+#define pmu_group attr_groups[0]
+#define format_group attr_groups[1]
+#define events_group attr_groups[2]
 
 struct intel_uncore_ops {
 	void (*init_box)(struct intel_uncore_box *);
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 061ac17..f438a44 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1148,7 +1148,6 @@
 	 * ip location, and move flags into the return ip location.
 	 */
 	pushl 4(%esp)	/* save return ip into ip slot */
-	subl $MCOUNT_INSN_SIZE, (%esp)	/* Adjust ip */
 
 	pushl $0	/* Load 0 into orig_ax */
 	pushl %gs
@@ -1169,6 +1168,7 @@
 	movl $__KERNEL_CS,13*4(%esp)
 
 	movl 12*4(%esp), %eax	/* Load ip (1st parameter) */
+	subl $MCOUNT_INSN_SIZE, %eax	/* Adjust ip */
 	movl 0x4(%ebp), %edx	/* Load parent ip (2nd parameter) */
 	leal function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */
 	pushl %esp		/* Save pt_regs as 4th parameter */
@@ -1180,7 +1180,6 @@
 	movl 14*4(%esp), %eax	/* Move flags back into cs */
 	movl %eax, 13*4(%esp)	/* Needed to keep addl from modifying flags */
 	movl 12*4(%esp), %eax	/* Get return ip from regs->ip */
-	addl $MCOUNT_INSN_SIZE, %eax
 	movl %eax, 14*4(%esp)	/* Put return ip back for ret */
 
 	popl %ebx
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index ed767b7..e9cc2b3 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -165,6 +165,10 @@
 	movq EFLAGS(%rsp), %rax
 	movq %rax, SS(%rsp)
 
+	/* Handlers can change the RIP */
+	movq RIP(%rsp), %rax
+	movq %rax, SS+8(%rsp)
+
 	/* restore the rest of pt_regs */
 	movq R15(%rsp), %r15
 	movq R14(%rsp), %r14
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 47ae102..b7c2a85 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -541,6 +541,8 @@
 	return 1;
 }
 
+static void __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs,
+				      struct kprobe_ctlblk *kcb);
 /*
  * Interrupts are disabled on entry as trap3 is an interrupt gate and they
  * remain disabled throughout this function.
@@ -599,6 +601,12 @@
 	} else if (kprobe_running()) {
 		p = __this_cpu_read(current_kprobe);
 		if (p->break_handler && p->break_handler(p, regs)) {
+#ifdef KPROBES_CAN_USE_FTRACE
+			if (kprobe_ftrace(p)) {
+				skip_singlestep(p, regs, kcb);
+				return 1;
+			}
+#endif
 			setup_singlestep(p, regs, kcb, 0);
 			return 1;
 		}
@@ -1053,6 +1061,21 @@
 }
 
 #ifdef KPROBES_CAN_USE_FTRACE
+static void __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs,
+				      struct kprobe_ctlblk *kcb)
+{
+	/*
+	 * Emulate singlestep (and also recover regs->ip)
+	 * as if there is a 5byte nop
+	 */
+	regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE;
+	if (unlikely(p->post_handler)) {
+		kcb->kprobe_status = KPROBE_HIT_SSDONE;
+		p->post_handler(p, regs, 0);
+	}
+	__this_cpu_write(current_kprobe, NULL);
+}
+
 /* Ftrace callback handler for kprobes */
 void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
 				     struct ftrace_ops *ops, struct pt_regs *regs)
@@ -1072,21 +1095,17 @@
 	if (kprobe_running()) {
 		kprobes_inc_nmissed_count(p);
 	} else {
-		regs->ip += sizeof(kprobe_opcode_t);
+		/* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */
+		regs->ip = ip + sizeof(kprobe_opcode_t);
 
 		__this_cpu_write(current_kprobe, p);
 		kcb->kprobe_status = KPROBE_HIT_ACTIVE;
-		if (p->pre_handler)
-			p->pre_handler(p, regs);
-
-		if (unlikely(p->post_handler)) {
-			/* Emulate singlestep as if there is a 5byte nop */
-			regs->ip = ip + MCOUNT_INSN_SIZE;
-			kcb->kprobe_status = KPROBE_HIT_SSDONE;
-			p->post_handler(p, regs, 0);
-		}
-		__this_cpu_write(current_kprobe, NULL);
-		regs->ip = ip;	/* Recover for next callback */
+		if (!p->pre_handler || !p->pre_handler(p, regs))
+			skip_singlestep(p, regs, kcb);
+		/*
+		 * If pre_handler returns !0, it sets regs->ip and
+		 * resets current kprobe.
+		 */
 	}
 end:
 	local_irq_restore(flags);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index dce75b76..148ed66 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2000,6 +2000,9 @@
 	case MSR_KVM_STEAL_TIME:
 		data = vcpu->arch.st.msr_val;
 		break;
+	case MSR_KVM_PV_EOI_EN:
+		data = vcpu->arch.pv_eoi.msr_val;
+		break;
 	case MSR_IA32_P5_MC_ADDR:
 	case MSR_IA32_P5_MC_TYPE:
 	case MSR_IA32_MCG_CAP:
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index b65a761..5141d80 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1283,7 +1283,7 @@
 	cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask));
 
 	args->op.cmd = MMUEXT_TLB_FLUSH_MULTI;
-	if (start != TLB_FLUSH_ALL && (end - start) <= PAGE_SIZE) {
+	if (end != TLB_FLUSH_ALL && (end - start) <= PAGE_SIZE) {
 		args->op.cmd = MMUEXT_INVLPG_MULTI;
 		args->op.arg1.linear_addr = start;
 	}
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index d4b2554..76ba0e9 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -599,7 +599,7 @@
 	if (p2m_index(set_pfn))
 		return false;
 
-	for (pfn = 0; pfn <= MAX_DOMAIN_PAGES; pfn += P2M_PER_PAGE) {
+	for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_PER_PAGE) {
 		topidx = p2m_top_index(pfn);
 
 		if (!p2m_top[topidx])
diff --git a/drivers/base/dma-contiguous.c b/drivers/base/dma-contiguous.c
index 78efb03..34d94c7 100644
--- a/drivers/base/dma-contiguous.c
+++ b/drivers/base/dma-contiguous.c
@@ -250,7 +250,7 @@
 		return -EINVAL;
 
 	/* Sanitise input arguments */
-	alignment = PAGE_SIZE << max(MAX_ORDER, pageblock_order);
+	alignment = PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order);
 	base = ALIGN(base, alignment);
 	size = ALIGN(size, alignment);
 	limit &= ~(alignment - 1);
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index b16c8a7..ba7926f5 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -294,7 +294,7 @@
 
 config GPIO_MC9S08DZ60
 	bool "MX35 3DS BOARD MC9S08DZ60 GPIO functions"
-	depends on I2C && MACH_MX35_3DS
+	depends on I2C=y && MACH_MX35_3DS
 	help
 	  Select this to enable the MC9S08DZ60 GPIO driver
 
diff --git a/drivers/gpio/gpio-em.c b/drivers/gpio/gpio-em.c
index ae37181..ec48ed5 100644
--- a/drivers/gpio/gpio-em.c
+++ b/drivers/gpio/gpio-em.c
@@ -247,9 +247,9 @@
 
 	p->irq_base = irq_alloc_descs(pdata->irq_base, 0,
 				      pdata->number_of_pins, numa_node_id());
-	if (IS_ERR_VALUE(p->irq_base)) {
+	if (p->irq_base < 0) {
 		dev_err(&pdev->dev, "cannot get irq_desc\n");
-		return -ENXIO;
+		return p->irq_base;
 	}
 	pr_debug("gio: hw base = %d, nr = %d, sw base = %d\n",
 		 pdata->gpio_base, pdata->number_of_pins, p->irq_base);
diff --git a/drivers/gpio/gpio-rdc321x.c b/drivers/gpio/gpio-rdc321x.c
index e97016a..b62d443 100644
--- a/drivers/gpio/gpio-rdc321x.c
+++ b/drivers/gpio/gpio-rdc321x.c
@@ -170,6 +170,7 @@
 	rdc321x_gpio_dev->reg2_data_base = r->start + 0x4;
 
 	rdc321x_gpio_dev->chip.label = "rdc321x-gpio";
+	rdc321x_gpio_dev->chip.owner = THIS_MODULE;
 	rdc321x_gpio_dev->chip.direction_input = rdc_gpio_direction_input;
 	rdc321x_gpio_dev->chip.direction_output = rdc_gpio_config;
 	rdc321x_gpio_dev->chip.get = rdc_gpio_get_value;
diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index a18c4aa..f1a4599 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -82,7 +82,7 @@
 	gpiochip_find(&gg_data, of_gpiochip_find_and_xlate);
 
 	of_node_put(gg_data.gpiospec.np);
-	pr_debug("%s exited with status %d\n", __func__, ret);
+	pr_debug("%s exited with status %d\n", __func__, gg_data.out_gpio);
 	return gg_data.out_gpio;
 }
 EXPORT_SYMBOL(of_get_named_gpio_flags);
diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index 08a7aa7..6fbfc24 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -1981,7 +1981,7 @@
 	if (!drm_core_check_feature(dev, DRIVER_MODESET))
 		return -EINVAL;
 
-	if (!req->flags)
+	if (!req->flags || (~DRM_MODE_CURSOR_FLAGS & req->flags))
 		return -EINVAL;
 
 	mutex_lock(&dev->mode_config.mutex);
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index a8743c3..b7ee230 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -87,6 +87,9 @@
 	int product_id;
 	u32 quirks;
 } edid_quirk_list[] = {
+	/* ASUS VW222S */
+	{ "ACI", 0x22a2, EDID_QUIRK_FORCE_REDUCED_BLANKING },
+
 	/* Acer AL1706 */
 	{ "ACR", 44358, EDID_QUIRK_PREFER_LARGE_60 },
 	/* Acer F51 */
diff --git a/drivers/gpu/drm/gma500/psb_intel_display.c b/drivers/gpu/drm/gma500/psb_intel_display.c
index 30dc22a..8033526 100644
--- a/drivers/gpu/drm/gma500/psb_intel_display.c
+++ b/drivers/gpu/drm/gma500/psb_intel_display.c
@@ -1362,6 +1362,9 @@
 	    (struct drm_connector **) (psb_intel_crtc + 1);
 	psb_intel_crtc->mode_set.num_connectors = 0;
 	psb_intel_cursor_init(dev, psb_intel_crtc);
+
+	/* Set to true so that the pipe is forced off on initial config. */
+	psb_intel_crtc->active = true;
 }
 
 int psb_intel_get_pipe_from_crtc_id(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index d9a5372..60815b8 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -72,7 +72,7 @@
 	/* ppgtt PDEs reside in the global gtt pagetable, which has 512*1024
 	 * entries. For aliasing ppgtt support we just steal them at the end for
 	 * now. */
-	first_pd_entry_in_global_pt = 512*1024 - I915_PPGTT_PD_ENTRIES;
+	first_pd_entry_in_global_pt = dev_priv->mm.gtt->gtt_total_entries - I915_PPGTT_PD_ENTRIES;
 
 	ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
 	if (!ppgtt)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index a69a3d0..2dfa6cf 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1384,7 +1384,7 @@
 				     enum pipe pipe, int reg)
 {
 	u32 val = I915_READ(reg);
-	WARN(hdmi_pipe_enabled(dev_priv, val, pipe),
+	WARN(hdmi_pipe_enabled(dev_priv, pipe, val),
 	     "PCH HDMI (0x%08x) enabled on transcoder %c, should be disabled\n",
 	     reg, pipe_name(pipe));
 
@@ -1404,13 +1404,13 @@
 
 	reg = PCH_ADPA;
 	val = I915_READ(reg);
-	WARN(adpa_pipe_enabled(dev_priv, val, pipe),
+	WARN(adpa_pipe_enabled(dev_priv, pipe, val),
 	     "PCH VGA enabled on transcoder %c, should be disabled\n",
 	     pipe_name(pipe));
 
 	reg = PCH_LVDS;
 	val = I915_READ(reg);
-	WARN(lvds_pipe_enabled(dev_priv, val, pipe),
+	WARN(lvds_pipe_enabled(dev_priv, pipe, val),
 	     "PCH LVDS enabled on transcoder %c, should be disabled\n",
 	     pipe_name(pipe));
 
@@ -1872,7 +1872,7 @@
 			     enum pipe pipe, int reg)
 {
 	u32 val = I915_READ(reg);
-	if (hdmi_pipe_enabled(dev_priv, val, pipe)) {
+	if (hdmi_pipe_enabled(dev_priv, pipe, val)) {
 		DRM_DEBUG_KMS("Disabling pch HDMI %x on pipe %d\n",
 			      reg, pipe);
 		I915_WRITE(reg, val & ~PORT_ENABLE);
@@ -1894,12 +1894,12 @@
 
 	reg = PCH_ADPA;
 	val = I915_READ(reg);
-	if (adpa_pipe_enabled(dev_priv, val, pipe))
+	if (adpa_pipe_enabled(dev_priv, pipe, val))
 		I915_WRITE(reg, val & ~ADPA_DAC_ENABLE);
 
 	reg = PCH_LVDS;
 	val = I915_READ(reg);
-	if (lvds_pipe_enabled(dev_priv, val, pipe)) {
+	if (lvds_pipe_enabled(dev_priv, pipe, val)) {
 		DRM_DEBUG_KMS("disable lvds on pipe %d val 0x%08x\n", pipe, val);
 		I915_WRITE(reg, val & ~LVDS_PORT_EN);
 		POSTING_READ(reg);
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index e05c0d3..e9a6f6a 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -780,6 +780,14 @@
 			DMI_MATCH(DMI_BOARD_NAME, "ZBOXSD-ID12/ID13"),
 		},
 	},
+	{
+		.callback = intel_no_lvds_dmi_callback,
+		.ident = "Gigabyte GA-D525TUD",
+		.matches = {
+			DMI_MATCH(DMI_BOARD_VENDOR, "Gigabyte Technology Co., Ltd."),
+			DMI_MATCH(DMI_BOARD_NAME, "D525TUD"),
+		},
+	},
 
 	{ }	/* terminating entry */
 };
diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index cc8df4d..7644f31 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -60,11 +60,11 @@
 
 	switch (fb->pixel_format) {
 	case DRM_FORMAT_XBGR8888:
-		sprctl |= SPRITE_FORMAT_RGBX888;
+		sprctl |= SPRITE_FORMAT_RGBX888 | SPRITE_RGB_ORDER_RGBX;
 		pixel_size = 4;
 		break;
 	case DRM_FORMAT_XRGB8888:
-		sprctl |= SPRITE_FORMAT_RGBX888 | SPRITE_RGB_ORDER_RGBX;
+		sprctl |= SPRITE_FORMAT_RGBX888;
 		pixel_size = 4;
 		break;
 	case DRM_FORMAT_YUYV:
diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c
index 1866dbb..c610144 100644
--- a/drivers/gpu/drm/nouveau/nouveau_state.c
+++ b/drivers/gpu/drm/nouveau/nouveau_state.c
@@ -736,9 +736,11 @@
 			}
 			break;
 		case NV_C0:
-			nvc0_copy_create(dev, 1);
+			if (!(nv_rd32(dev, 0x022500) & 0x00000200))
+				nvc0_copy_create(dev, 1);
 		case NV_D0:
-			nvc0_copy_create(dev, 0);
+			if (!(nv_rd32(dev, 0x022500) & 0x00000100))
+				nvc0_copy_create(dev, 0);
 			break;
 		default:
 			break;
diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index f4d4505..2817101 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -258,7 +258,6 @@
 		radeon_crtc->enabled = true;
 		/* adjust pm to dpms changes BEFORE enabling crtcs */
 		radeon_pm_compute_clocks(rdev);
-		/* disable crtc pair power gating before programming */
 		if (ASIC_IS_DCE6(rdev) && !radeon_crtc->in_mode_set)
 			atombios_powergate_crtc(crtc, ATOM_DISABLE);
 		atombios_enable_crtc(crtc, ATOM_ENABLE);
@@ -278,25 +277,8 @@
 			atombios_enable_crtc_memreq(crtc, ATOM_DISABLE);
 		atombios_enable_crtc(crtc, ATOM_DISABLE);
 		radeon_crtc->enabled = false;
-		/* power gating is per-pair */
-		if (ASIC_IS_DCE6(rdev) && !radeon_crtc->in_mode_set) {
-			struct drm_crtc *other_crtc;
-			struct radeon_crtc *other_radeon_crtc;
-			list_for_each_entry(other_crtc, &rdev->ddev->mode_config.crtc_list, head) {
-				other_radeon_crtc = to_radeon_crtc(other_crtc);
-				if (((radeon_crtc->crtc_id == 0) && (other_radeon_crtc->crtc_id == 1)) ||
-				    ((radeon_crtc->crtc_id == 1) && (other_radeon_crtc->crtc_id == 0)) ||
-				    ((radeon_crtc->crtc_id == 2) && (other_radeon_crtc->crtc_id == 3)) ||
-				    ((radeon_crtc->crtc_id == 3) && (other_radeon_crtc->crtc_id == 2)) ||
-				    ((radeon_crtc->crtc_id == 4) && (other_radeon_crtc->crtc_id == 5)) ||
-				    ((radeon_crtc->crtc_id == 5) && (other_radeon_crtc->crtc_id == 4))) {
-					/* if both crtcs in the pair are off, enable power gating */
-					if (other_radeon_crtc->enabled == false)
-						atombios_powergate_crtc(crtc, ATOM_ENABLE);
-					break;
-				}
-			}
-		}
+		if (ASIC_IS_DCE6(rdev) && !radeon_crtc->in_mode_set)
+			atombios_powergate_crtc(crtc, ATOM_ENABLE);
 		/* adjust pm to dpms changes AFTER disabling crtcs */
 		radeon_pm_compute_clocks(rdev);
 		break;
@@ -1682,9 +1664,22 @@
 	struct drm_device *dev = crtc->dev;
 	struct radeon_device *rdev = dev->dev_private;
 	struct radeon_atom_ss ss;
+	int i;
 
 	atombios_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
 
+	for (i = 0; i < rdev->num_crtc; i++) {
+		if (rdev->mode_info.crtcs[i] &&
+		    rdev->mode_info.crtcs[i]->enabled &&
+		    i != radeon_crtc->crtc_id &&
+		    radeon_crtc->pll_id == rdev->mode_info.crtcs[i]->pll_id) {
+			/* one other crtc is using this pll don't turn
+			 * off the pll
+			 */
+			goto done;
+		}
+	}
+
 	switch (radeon_crtc->pll_id) {
 	case ATOM_PPLL1:
 	case ATOM_PPLL2:
@@ -1701,6 +1696,7 @@
 	default:
 		break;
 	}
+done:
 	radeon_crtc->pll_id = -1;
 }
 
diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c
index 7712cf5..3623b98 100644
--- a/drivers/gpu/drm/radeon/atombios_dp.c
+++ b/drivers/gpu/drm/radeon/atombios_dp.c
@@ -577,30 +577,25 @@
 	struct radeon_device *rdev = dev->dev_private;
 	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
 	int panel_mode = DP_PANEL_MODE_EXTERNAL_DP_MODE;
+	u16 dp_bridge = radeon_connector_encoder_get_dp_bridge_encoder_id(connector);
+	u8 tmp;
 
 	if (!ASIC_IS_DCE4(rdev))
 		return panel_mode;
 
-	if (radeon_connector_encoder_get_dp_bridge_encoder_id(connector) ==
-	    ENCODER_OBJECT_ID_NUTMEG)
-		panel_mode = DP_PANEL_MODE_INTERNAL_DP1_MODE;
-	else if (radeon_connector_encoder_get_dp_bridge_encoder_id(connector) ==
-		 ENCODER_OBJECT_ID_TRAVIS) {
-		u8 id[6];
-		int i;
-		for (i = 0; i < 6; i++)
-			id[i] = radeon_read_dpcd_reg(radeon_connector, 0x503 + i);
-		if (id[0] == 0x73 &&
-		    id[1] == 0x69 &&
-		    id[2] == 0x76 &&
-		    id[3] == 0x61 &&
-		    id[4] == 0x72 &&
-		    id[5] == 0x54)
+	if (dp_bridge != ENCODER_OBJECT_ID_NONE) {
+		/* DP bridge chips */
+		tmp = radeon_read_dpcd_reg(radeon_connector, DP_EDP_CONFIGURATION_CAP);
+		if (tmp & 1)
+			panel_mode = DP_PANEL_MODE_INTERNAL_DP2_MODE;
+		else if ((dp_bridge == ENCODER_OBJECT_ID_NUTMEG) ||
+			 (dp_bridge == ENCODER_OBJECT_ID_TRAVIS))
 			panel_mode = DP_PANEL_MODE_INTERNAL_DP1_MODE;
 		else
-			panel_mode = DP_PANEL_MODE_INTERNAL_DP2_MODE;
+			panel_mode = DP_PANEL_MODE_EXTERNAL_DP_MODE;
 	} else if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) {
-		u8 tmp = radeon_read_dpcd_reg(radeon_connector, DP_EDP_CONFIGURATION_CAP);
+		/* eDP */
+		tmp = radeon_read_dpcd_reg(radeon_connector, DP_EDP_CONFIGURATION_CAP);
 		if (tmp & 1)
 			panel_mode = DP_PANEL_MODE_INTERNAL_DP2_MODE;
 	}
diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c
index f9bc27f..6e8803a 100644
--- a/drivers/gpu/drm/radeon/atombios_encoders.c
+++ b/drivers/gpu/drm/radeon/atombios_encoders.c
@@ -1379,6 +1379,8 @@
 	struct drm_device *dev = encoder->dev;
 	struct radeon_device *rdev = dev->dev_private;
 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	struct drm_encoder *ext_encoder = radeon_get_external_encoder(encoder);
+	struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv;
 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
 	struct radeon_connector *radeon_connector = NULL;
 	struct radeon_connector_atom_dig *radeon_dig_connector = NULL;
@@ -1390,19 +1392,37 @@
 
 	switch (mode) {
 	case DRM_MODE_DPMS_ON:
-		/* some early dce3.2 boards have a bug in their transmitter control table */
-		if ((rdev->family == CHIP_RV710) || (rdev->family == CHIP_RV730) ||
-		    ASIC_IS_DCE41(rdev) || ASIC_IS_DCE5(rdev)) {
-			if (ASIC_IS_DCE6(rdev)) {
-				/* It seems we need to call ATOM_ENCODER_CMD_SETUP again
-				 * before reenabling encoder on DPMS ON, otherwise we never
-				 * get picture
-				 */
-				atombios_dig_encoder_setup(encoder, ATOM_ENCODER_CMD_SETUP, 0);
+		if (ASIC_IS_DCE41(rdev) || ASIC_IS_DCE5(rdev)) {
+			if (!connector)
+				dig->panel_mode = DP_PANEL_MODE_EXTERNAL_DP_MODE;
+			else
+				dig->panel_mode = radeon_dp_get_panel_mode(encoder, connector);
+
+			/* setup and enable the encoder */
+			atombios_dig_encoder_setup(encoder, ATOM_ENCODER_CMD_SETUP, 0);
+			atombios_dig_encoder_setup(encoder,
+						   ATOM_ENCODER_CMD_SETUP_PANEL_MODE,
+						   dig->panel_mode);
+			if (ext_encoder) {
+				if (ASIC_IS_DCE41(rdev) || ASIC_IS_DCE61(rdev))
+					atombios_external_encoder_setup(encoder, ext_encoder,
+									EXTERNAL_ENCODER_ACTION_V3_ENCODER_SETUP);
 			}
 			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE, 0, 0);
-		} else {
+		} else if (ASIC_IS_DCE4(rdev)) {
+			/* setup and enable the encoder */
+			atombios_dig_encoder_setup(encoder, ATOM_ENCODER_CMD_SETUP, 0);
+			/* enable the transmitter */
+			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE, 0, 0);
 			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE_OUTPUT, 0, 0);
+		} else {
+			/* setup and enable the encoder and transmitter */
+			atombios_dig_encoder_setup(encoder, ATOM_ENABLE, 0);
+			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_SETUP, 0, 0);
+			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE, 0, 0);
+			/* some early dce3.2 boards have a bug in their transmitter control table */
+			if ((rdev->family != CHIP_RV710) || (rdev->family != CHIP_RV730))
+				atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE_OUTPUT, 0, 0);
 		}
 		if (ENCODER_MODE_IS_DP(atombios_get_encoder_mode(encoder)) && connector) {
 			if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) {
@@ -1420,10 +1440,19 @@
 	case DRM_MODE_DPMS_STANDBY:
 	case DRM_MODE_DPMS_SUSPEND:
 	case DRM_MODE_DPMS_OFF:
-		if (ASIC_IS_DCE41(rdev) || ASIC_IS_DCE5(rdev))
+		if (ASIC_IS_DCE41(rdev) || ASIC_IS_DCE5(rdev)) {
+			/* disable the transmitter */
 			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_DISABLE, 0, 0);
-		else
+		} else if (ASIC_IS_DCE4(rdev)) {
+			/* disable the transmitter */
 			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_DISABLE_OUTPUT, 0, 0);
+			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_DISABLE, 0, 0);
+		} else {
+			/* disable the encoder and transmitter */
+			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_DISABLE_OUTPUT, 0, 0);
+			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_DISABLE, 0, 0);
+			atombios_dig_encoder_setup(encoder, ATOM_DISABLE, 0);
+		}
 		if (ENCODER_MODE_IS_DP(atombios_get_encoder_mode(encoder)) && connector) {
 			if (ASIC_IS_DCE4(rdev))
 				atombios_dig_encoder_setup(encoder, ATOM_ENCODER_CMD_DP_VIDEO_OFF, 0);
@@ -1740,13 +1769,34 @@
 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
 	struct drm_encoder *test_encoder;
-	struct radeon_encoder_atom_dig *dig;
+	struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv;
 	uint32_t dig_enc_in_use = 0;
 
-	/* DCE4/5 */
-	if (ASIC_IS_DCE4(rdev)) {
-		dig = radeon_encoder->enc_priv;
-		if (ASIC_IS_DCE41(rdev)) {
+	if (ASIC_IS_DCE6(rdev)) {
+		/* DCE6 */
+		switch (radeon_encoder->encoder_id) {
+		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+			if (dig->linkb)
+				return 1;
+			else
+				return 0;
+			break;
+		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+			if (dig->linkb)
+				return 3;
+			else
+				return 2;
+			break;
+		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+			if (dig->linkb)
+				return 5;
+			else
+				return 4;
+			break;
+		}
+	} else if (ASIC_IS_DCE4(rdev)) {
+		/* DCE4/5 */
+		if (ASIC_IS_DCE41(rdev) && !ASIC_IS_DCE61(rdev)) {
 			/* ontario follows DCE4 */
 			if (rdev->family == CHIP_PALM) {
 				if (dig->linkb)
@@ -1848,10 +1898,12 @@
 	struct drm_device *dev = encoder->dev;
 	struct radeon_device *rdev = dev->dev_private;
 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
-	struct drm_encoder *ext_encoder = radeon_get_external_encoder(encoder);
 
 	radeon_encoder->pixel_clock = adjusted_mode->clock;
 
+	/* need to call this here rather than in prepare() since we need some crtc info */
+	radeon_atom_encoder_dpms(encoder, DRM_MODE_DPMS_OFF);
+
 	if (ASIC_IS_AVIVO(rdev) && !ASIC_IS_DCE4(rdev)) {
 		if (radeon_encoder->active_device & (ATOM_DEVICE_CV_SUPPORT | ATOM_DEVICE_TV_SUPPORT))
 			atombios_yuv_setup(encoder, true);
@@ -1870,38 +1922,7 @@
 	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
 	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
 	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA:
-		if (ASIC_IS_DCE41(rdev) || ASIC_IS_DCE5(rdev)) {
-			struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
-			struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv;
-
-			if (!connector)
-				dig->panel_mode = DP_PANEL_MODE_EXTERNAL_DP_MODE;
-			else
-				dig->panel_mode = radeon_dp_get_panel_mode(encoder, connector);
-
-			/* setup and enable the encoder */
-			atombios_dig_encoder_setup(encoder, ATOM_ENCODER_CMD_SETUP, 0);
-			atombios_dig_encoder_setup(encoder,
-						   ATOM_ENCODER_CMD_SETUP_PANEL_MODE,
-						   dig->panel_mode);
-		} else if (ASIC_IS_DCE4(rdev)) {
-			/* disable the transmitter */
-			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_DISABLE, 0, 0);
-			/* setup and enable the encoder */
-			atombios_dig_encoder_setup(encoder, ATOM_ENCODER_CMD_SETUP, 0);
-
-			/* enable the transmitter */
-			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE, 0, 0);
-		} else {
-			/* disable the encoder and transmitter */
-			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_DISABLE, 0, 0);
-			atombios_dig_encoder_setup(encoder, ATOM_DISABLE, 0);
-
-			/* setup and enable the encoder and transmitter */
-			atombios_dig_encoder_setup(encoder, ATOM_ENABLE, 0);
-			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_SETUP, 0, 0);
-			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE, 0, 0);
-		}
+		/* handled in dpms */
 		break;
 	case ENCODER_OBJECT_ID_INTERNAL_DDI:
 	case ENCODER_OBJECT_ID_INTERNAL_DVO1:
@@ -1922,14 +1943,6 @@
 		break;
 	}
 
-	if (ext_encoder) {
-		if (ASIC_IS_DCE41(rdev) || ASIC_IS_DCE61(rdev))
-			atombios_external_encoder_setup(encoder, ext_encoder,
-							EXTERNAL_ENCODER_ACTION_V3_ENCODER_SETUP);
-		else
-			atombios_external_encoder_setup(encoder, ext_encoder, ATOM_ENABLE);
-	}
-
 	atombios_apply_encoder_quirks(encoder, adjusted_mode);
 
 	if (atombios_get_encoder_mode(encoder) == ATOM_ENCODER_MODE_HDMI) {
@@ -2116,7 +2129,6 @@
 	}
 
 	radeon_atom_output_lock(encoder, true);
-	radeon_atom_encoder_dpms(encoder, DRM_MODE_DPMS_OFF);
 
 	if (connector) {
 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
@@ -2137,6 +2149,7 @@
 
 static void radeon_atom_encoder_commit(struct drm_encoder *encoder)
 {
+	/* need to call this here as we need the crtc set up */
 	radeon_atom_encoder_dpms(encoder, DRM_MODE_DPMS_ON);
 	radeon_atom_output_lock(encoder, false);
 }
@@ -2177,14 +2190,7 @@
 	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
 	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
 	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA:
-		if (ASIC_IS_DCE4(rdev))
-			/* disable the transmitter */
-			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_DISABLE, 0, 0);
-		else {
-			/* disable the encoder and transmitter */
-			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_DISABLE, 0, 0);
-			atombios_dig_encoder_setup(encoder, ATOM_DISABLE, 0);
-		}
+		/* handled in dpms */
 		break;
 	case ENCODER_OBJECT_ID_INTERNAL_DDI:
 	case ENCODER_OBJECT_ID_INTERNAL_DVO1:
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
index ab74e6b..f37676d 100644
--- a/drivers/gpu/drm/radeon/r600_cs.c
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -63,6 +63,7 @@
 	u32			cb_color_size_idx[8]; /* unused */
 	u32			cb_target_mask;
 	u32			cb_shader_mask;  /* unused */
+	bool			is_resolve;
 	u32			cb_color_size[8];
 	u32			vgt_strmout_en;
 	u32			vgt_strmout_buffer_en;
@@ -315,7 +316,15 @@
 		track->cb_color_bo[i] = NULL;
 		track->cb_color_bo_offset[i] = 0xFFFFFFFF;
 		track->cb_color_bo_mc[i] = 0xFFFFFFFF;
+		track->cb_color_frag_bo[i] = NULL;
+		track->cb_color_frag_offset[i] = 0xFFFFFFFF;
+		track->cb_color_tile_bo[i] = NULL;
+		track->cb_color_tile_offset[i] = 0xFFFFFFFF;
+		track->cb_color_mask[i] = 0xFFFFFFFF;
 	}
+	track->is_resolve = false;
+	track->nsamples = 16;
+	track->log_nsamples = 4;
 	track->cb_target_mask = 0xFFFFFFFF;
 	track->cb_shader_mask = 0xFFFFFFFF;
 	track->cb_dirty = true;
@@ -352,6 +361,8 @@
 	volatile u32 *ib = p->ib.ptr;
 	unsigned array_mode;
 	u32 format;
+	/* When resolve is used, the second colorbuffer has always 1 sample. */
+	unsigned nsamples = track->is_resolve && i == 1 ? 1 : track->nsamples;
 
 	size = radeon_bo_size(track->cb_color_bo[i]) - track->cb_color_bo_offset[i];
 	format = G_0280A0_FORMAT(track->cb_color_info[i]);
@@ -375,7 +386,7 @@
 	array_check.group_size = track->group_size;
 	array_check.nbanks = track->nbanks;
 	array_check.npipes = track->npipes;
-	array_check.nsamples = track->nsamples;
+	array_check.nsamples = nsamples;
 	array_check.blocksize = r600_fmt_get_blocksize(format);
 	if (r600_get_array_mode_alignment(&array_check,
 					  &pitch_align, &height_align, &depth_align, &base_align)) {
@@ -421,7 +432,7 @@
 
 	/* check offset */
 	tmp = r600_fmt_get_nblocksy(format, height) * r600_fmt_get_nblocksx(format, pitch) *
-	      r600_fmt_get_blocksize(format) * track->nsamples;
+	      r600_fmt_get_blocksize(format) * nsamples;
 	switch (array_mode) {
 	default:
 	case V_0280A0_ARRAY_LINEAR_GENERAL:
@@ -792,6 +803,12 @@
 	 */
 	if (track->cb_dirty) {
 		tmp = track->cb_target_mask;
+
+		/* We must check both colorbuffers for RESOLVE. */
+		if (track->is_resolve) {
+			tmp |= 0xff;
+		}
+
 		for (i = 0; i < 8; i++) {
 			if ((tmp >> (i * 4)) & 0xF) {
 				/* at least one component is enabled */
@@ -1281,6 +1298,11 @@
 		track->nsamples = 1 << tmp;
 		track->cb_dirty = true;
 		break;
+	case R_028808_CB_COLOR_CONTROL:
+		tmp = G_028808_SPECIAL_OP(radeon_get_ib_value(p, idx));
+		track->is_resolve = tmp == V_028808_SPECIAL_RESOLVE_BOX;
+		track->cb_dirty = true;
+		break;
 	case R_0280A0_CB_COLOR0_INFO:
 	case R_0280A4_CB_COLOR1_INFO:
 	case R_0280A8_CB_COLOR2_INFO:
@@ -1416,7 +1438,7 @@
 	case R_028118_CB_COLOR6_MASK:
 	case R_02811C_CB_COLOR7_MASK:
 		tmp = (reg - R_028100_CB_COLOR0_MASK) / 4;
-		track->cb_color_mask[tmp] = ib[idx];
+		track->cb_color_mask[tmp] = radeon_get_ib_value(p, idx);
 		if (G_0280A0_TILE_MODE(track->cb_color_info[tmp])) {
 			track->cb_dirty = true;
 		}
diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h
index bdb69a6..fa6f370 100644
--- a/drivers/gpu/drm/radeon/r600d.h
+++ b/drivers/gpu/drm/radeon/r600d.h
@@ -66,6 +66,14 @@
 #define	CC_RB_BACKEND_DISABLE				0x98F4
 #define		BACKEND_DISABLE(x)				((x) << 16)
 
+#define R_028808_CB_COLOR_CONTROL			0x28808
+#define   S_028808_SPECIAL_OP(x)                       (((x) & 0x7) << 4)
+#define   G_028808_SPECIAL_OP(x)                       (((x) >> 4) & 0x7)
+#define   C_028808_SPECIAL_OP                          0xFFFFFF8F
+#define     V_028808_SPECIAL_NORMAL                     0x00
+#define     V_028808_SPECIAL_DISABLE                    0x01
+#define     V_028808_SPECIAL_RESOLVE_BOX                0x07
+
 #define	CB_COLOR0_BASE					0x28040
 #define	CB_COLOR1_BASE					0x28044
 #define	CB_COLOR2_BASE					0x28048
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index d2e2438..7a3daeb 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -1051,7 +1051,7 @@
 	if (rdev->flags & RADEON_IS_AGP)
 		rdev->need_dma32 = true;
 	if ((rdev->flags & RADEON_IS_PCI) &&
-	    (rdev->family < CHIP_RS400))
+	    (rdev->family <= CHIP_RS740))
 		rdev->need_dma32 = true;
 
 	dma_bits = rdev->need_dma32 ? 32 : 40;
@@ -1346,12 +1346,15 @@
 		for (i = 0; i < RADEON_NUM_RINGS; ++i) {
 			radeon_ring_restore(rdev, &rdev->ring[i],
 					    ring_sizes[i], ring_data[i]);
+			ring_sizes[i] = 0;
+			ring_data[i] = NULL;
 		}
 
 		r = radeon_ib_ring_tests(rdev);
 		if (r) {
 			dev_err(rdev->dev, "ib ring test failed (%d).\n", r);
 			if (saved) {
+				saved = false;
 				radeon_suspend(rdev);
 				goto retry;
 			}
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index 27d22d7..8c593ea 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -63,9 +63,10 @@
  *   2.19.0 - r600-eg: MSAA textures
  *   2.20.0 - r600-si: RADEON_INFO_TIMESTAMP query
  *   2.21.0 - r600-r700: FMASK and CMASK
+ *   2.22.0 - r600 only: RESOLVE_BOX allowed
  */
 #define KMS_DRIVER_MAJOR	2
-#define KMS_DRIVER_MINOR	21
+#define KMS_DRIVER_MINOR	22
 #define KMS_DRIVER_PATCHLEVEL	0
 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
 int radeon_driver_unload_kms(struct drm_device *dev);
diff --git a/drivers/gpu/drm/radeon/reg_srcs/r600 b/drivers/gpu/drm/radeon/reg_srcs/r600
index f93e45d..20bfbda 100644
--- a/drivers/gpu/drm/radeon/reg_srcs/r600
+++ b/drivers/gpu/drm/radeon/reg_srcs/r600
@@ -744,7 +744,6 @@
 0x00028C38 CB_CLRCMP_DST
 0x00028C3C CB_CLRCMP_MSK
 0x00028C34 CB_CLRCMP_SRC
-0x00028808 CB_COLOR_CONTROL
 0x0002842C CB_FOG_BLUE
 0x00028428 CB_FOG_GREEN
 0x00028424 CB_FOG_RED
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 8bf8a64..8bcd168 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -996,7 +996,8 @@
 	struct hid_driver *hdrv = hid->driver;
 	int ret;
 
-	hid_dump_input(hid, usage, value);
+	if (!list_empty(&hid->debug_list))
+		hid_dump_input(hid, usage, value);
 
 	if (hdrv && hdrv->event && hid_match_usage(hid, usage)) {
 		ret = hdrv->event(hid, field, usage, value);
@@ -1558,7 +1559,9 @@
 	{ HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_M610X) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LABTEC, USB_DEVICE_ID_LABTEC_WIRELESS_KEYBOARD) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LCPOWER, USB_DEVICE_ID_LCPOWER_LC1000 ) },
- 	{ HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_TPKBD) },
+#if IS_ENABLED(CONFIG_HID_LENOVO_TPKBD)
+	{ HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_TPKBD) },
+#endif
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_MX3000_RECEIVER) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_S510_RECEIVER) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_S510_RECEIVER_2) },
diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c
index 0f9c146..4d524b5 100644
--- a/drivers/hid/hid-logitech-dj.c
+++ b/drivers/hid/hid-logitech-dj.c
@@ -439,7 +439,7 @@
 	struct dj_report *dj_report;
 	int retval;
 
-	dj_report = kzalloc(sizeof(dj_report), GFP_KERNEL);
+	dj_report = kzalloc(sizeof(struct dj_report), GFP_KERNEL);
 	if (!dj_report)
 		return -ENOMEM;
 	dj_report->report_id = REPORT_ID_DJ_SHORT;
@@ -456,7 +456,7 @@
 	struct dj_report *dj_report;
 	int retval;
 
-	dj_report = kzalloc(sizeof(dj_report), GFP_KERNEL);
+	dj_report = kzalloc(sizeof(struct dj_report), GFP_KERNEL);
 	if (!dj_report)
 		return -ENOMEM;
 	dj_report->report_id = REPORT_ID_DJ_SHORT;
diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c
index 903eef3..991e85c 100644
--- a/drivers/hid/usbhid/hid-quirks.c
+++ b/drivers/hid/usbhid/hid-quirks.c
@@ -70,6 +70,7 @@
 	{ USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_AXIS_295, HID_QUIRK_NOGET },
 	{ USB_VENDOR_ID_DMI, USB_DEVICE_ID_DMI_ENC, HID_QUIRK_NOGET },
 	{ USB_VENDOR_ID_ELO, USB_DEVICE_ID_ELO_TS2700, HID_QUIRK_NOGET },
+	{ USB_VENDOR_ID_MGE, USB_DEVICE_ID_MGE_UPS, HID_QUIRK_NOGET },
 	{ USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN, HID_QUIRK_NO_INIT_REPORTS },
 	{ USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN1, HID_QUIRK_NO_INIT_REPORTS },
 	{ USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN2, HID_QUIRK_NO_INIT_REPORTS },
diff --git a/drivers/hwmon/asus_atk0110.c b/drivers/hwmon/asus_atk0110.c
index 351d1f4..4ee5789 100644
--- a/drivers/hwmon/asus_atk0110.c
+++ b/drivers/hwmon/asus_atk0110.c
@@ -34,6 +34,12 @@
 		.matches = {
 			DMI_MATCH(DMI_BOARD_NAME, "SABERTOOTH X58")
 		}
+	}, {
+		/* Old interface reads the same sensor for fan0 and fan1 */
+		.ident = "Asus M5A78L",
+		.matches = {
+			DMI_MATCH(DMI_BOARD_NAME, "M5A78L")
+		}
 	},
 	{ }
 };
diff --git a/drivers/input/keyboard/imx_keypad.c b/drivers/input/keyboard/imx_keypad.c
index ff4c0a8..ce68e36 100644
--- a/drivers/input/keyboard/imx_keypad.c
+++ b/drivers/input/keyboard/imx_keypad.c
@@ -358,6 +358,7 @@
 	/* Inhibit KDI and KRI interrupts. */
 	reg_val = readw(keypad->mmio_base + KPSR);
 	reg_val &= ~(KBD_STAT_KRIE | KBD_STAT_KDIE);
+	reg_val |= KBD_STAT_KPKR | KBD_STAT_KPKD;
 	writew(reg_val, keypad->mmio_base + KPSR);
 
 	/* Colums as open drain and disable all rows */
@@ -515,7 +516,9 @@
 	input_set_drvdata(input_dev, keypad);
 
 	/* Ensure that the keypad will stay dormant until opened */
+	clk_enable(keypad->clk);
 	imx_keypad_inhibit(keypad);
+	clk_disable(keypad->clk);
 
 	error = request_irq(irq, imx_keypad_irq_handler, 0,
 			    pdev->name, keypad);
diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h
index 5ec774d..6918773 100644
--- a/drivers/input/serio/i8042-x86ia64io.h
+++ b/drivers/input/serio/i8042-x86ia64io.h
@@ -177,6 +177,20 @@
 		},
 	},
 	{
+		/* Gigabyte T1005 - defines wrong chassis type ("Other") */
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "GIGABYTE"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "T1005"),
+		},
+	},
+	{
+		/* Gigabyte T1005M/P - defines wrong chassis type ("Other") */
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "GIGABYTE"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "T1005M/P"),
+		},
+	},
+	{
 		.matches = {
 			DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
 			DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion dv9700"),
diff --git a/drivers/input/tablet/wacom_wac.c b/drivers/input/tablet/wacom_wac.c
index 0020419..532d067 100644
--- a/drivers/input/tablet/wacom_wac.c
+++ b/drivers/input/tablet/wacom_wac.c
@@ -1848,7 +1848,10 @@
 	{ "Wacom Intuos5 M", WACOM_PKGLEN_INTUOS,  44704, 27940, 2047,
 	  63, INTUOS5, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES };
 static const struct wacom_features wacom_features_0xF4 =
-	{ "Wacom Cintiq 24HD",    WACOM_PKGLEN_INTUOS,   104480, 65600, 2047,
+	{ "Wacom Cintiq 24HD",       WACOM_PKGLEN_INTUOS,   104480, 65600, 2047,
+	  63, WACOM_24HD, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES };
+static const struct wacom_features wacom_features_0xF8 =
+	{ "Wacom Cintiq 24HD touch", WACOM_PKGLEN_INTUOS,   104480, 65600, 2047,
 	  63, WACOM_24HD, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES };
 static const struct wacom_features wacom_features_0x3F =
 	{ "Wacom Cintiq 21UX",    WACOM_PKGLEN_INTUOS,    87200, 65600, 1023,
@@ -2091,6 +2094,7 @@
 	{ USB_DEVICE_WACOM(0xEF) },
 	{ USB_DEVICE_WACOM(0x47) },
 	{ USB_DEVICE_WACOM(0xF4) },
+	{ USB_DEVICE_WACOM(0xF8) },
 	{ USB_DEVICE_WACOM(0xFA) },
 	{ USB_DEVICE_LENOVO(0x6004) },
 	{ }
diff --git a/drivers/input/touchscreen/edt-ft5x06.c b/drivers/input/touchscreen/edt-ft5x06.c
index 9afc777..b06a5e3 100644
--- a/drivers/input/touchscreen/edt-ft5x06.c
+++ b/drivers/input/touchscreen/edt-ft5x06.c
@@ -602,6 +602,7 @@
 {
 	if (tsdata->debug_dir)
 		debugfs_remove_recursive(tsdata->debug_dir);
+	kfree(tsdata->raw_buffer);
 }
 
 #else
@@ -843,7 +844,6 @@
 	if (gpio_is_valid(pdata->reset_pin))
 		gpio_free(pdata->reset_pin);
 
-	kfree(tsdata->raw_buffer);
 	kfree(tsdata);
 
 	return 0;
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index f1c84de..172a768 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -1411,7 +1411,8 @@
 		/* complete ongoing async transfer before issuing discard */
 		if (card->host->areq)
 			mmc_blk_issue_rw_rq(mq, NULL);
-		if (req->cmd_flags & REQ_SECURE)
+		if (req->cmd_flags & REQ_SECURE &&
+			!(card->quirks & MMC_QUIRK_SEC_ERASE_TRIM_BROKEN))
 			ret = mmc_blk_issue_secdiscard_rq(mq, req);
 		else
 			ret = mmc_blk_issue_discard_rq(mq, req);
@@ -1716,6 +1717,7 @@
 #define CID_MANFID_SANDISK	0x2
 #define CID_MANFID_TOSHIBA	0x11
 #define CID_MANFID_MICRON	0x13
+#define CID_MANFID_SAMSUNG	0x15
 
 static const struct mmc_fixup blk_fixups[] =
 {
@@ -1752,6 +1754,28 @@
 	MMC_FIXUP(CID_NAME_ANY, CID_MANFID_MICRON, 0x200, add_quirk_mmc,
 		  MMC_QUIRK_LONG_READ_TIME),
 
+	/*
+	 * On these Samsung MoviNAND parts, performing secure erase or
+	 * secure trim can result in unrecoverable corruption due to a
+	 * firmware bug.
+	 */
+	MMC_FIXUP("M8G2FA", CID_MANFID_SAMSUNG, CID_OEMID_ANY, add_quirk_mmc,
+		  MMC_QUIRK_SEC_ERASE_TRIM_BROKEN),
+	MMC_FIXUP("MAG4FA", CID_MANFID_SAMSUNG, CID_OEMID_ANY, add_quirk_mmc,
+		  MMC_QUIRK_SEC_ERASE_TRIM_BROKEN),
+	MMC_FIXUP("MBG8FA", CID_MANFID_SAMSUNG, CID_OEMID_ANY, add_quirk_mmc,
+		  MMC_QUIRK_SEC_ERASE_TRIM_BROKEN),
+	MMC_FIXUP("MCGAFA", CID_MANFID_SAMSUNG, CID_OEMID_ANY, add_quirk_mmc,
+		  MMC_QUIRK_SEC_ERASE_TRIM_BROKEN),
+	MMC_FIXUP("VAL00M", CID_MANFID_SAMSUNG, CID_OEMID_ANY, add_quirk_mmc,
+		  MMC_QUIRK_SEC_ERASE_TRIM_BROKEN),
+	MMC_FIXUP("VYL00M", CID_MANFID_SAMSUNG, CID_OEMID_ANY, add_quirk_mmc,
+		  MMC_QUIRK_SEC_ERASE_TRIM_BROKEN),
+	MMC_FIXUP("KYL00M", CID_MANFID_SAMSUNG, CID_OEMID_ANY, add_quirk_mmc,
+		  MMC_QUIRK_SEC_ERASE_TRIM_BROKEN),
+	MMC_FIXUP("VZL00M", CID_MANFID_SAMSUNG, CID_OEMID_ANY, add_quirk_mmc,
+		  MMC_QUIRK_SEC_ERASE_TRIM_BROKEN),
+
 	END_FIXUP
 };
 
diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c
index 322412c..a53c7c4 100644
--- a/drivers/mmc/host/atmel-mci.c
+++ b/drivers/mmc/host/atmel-mci.c
@@ -81,6 +81,7 @@
 	bool	has_bad_data_ordering;
 	bool	need_reset_after_xfer;
 	bool	need_blksz_mul_4;
+	bool	need_notbusy_for_read_ops;
 };
 
 struct atmel_mci_dma {
@@ -1625,7 +1626,8 @@
 				__func__);
 			atmci_set_completed(host, EVENT_XFER_COMPLETE);
 
-			if (host->data->flags & MMC_DATA_WRITE) {
+			if (host->caps.need_notbusy_for_read_ops ||
+			   (host->data->flags & MMC_DATA_WRITE)) {
 				atmci_writel(host, ATMCI_IER, ATMCI_NOTBUSY);
 				state = STATE_WAITING_NOTBUSY;
 			} else if (host->mrq->stop) {
@@ -2218,6 +2220,7 @@
 	host->caps.has_bad_data_ordering = 1;
 	host->caps.need_reset_after_xfer = 1;
 	host->caps.need_blksz_mul_4 = 1;
+	host->caps.need_notbusy_for_read_ops = 0;
 
 	/* keep only major version number */
 	switch (version & 0xf00) {
@@ -2238,6 +2241,7 @@
 	case 0x200:
 		host->caps.has_rwproof = 1;
 		host->caps.need_blksz_mul_4 = 0;
+		host->caps.need_notbusy_for_read_ops = 1;
 	case 0x100:
 		host->caps.has_bad_data_ordering = 0;
 		host->caps.need_reset_after_xfer = 0;
diff --git a/drivers/mmc/host/bfin_sdh.c b/drivers/mmc/host/bfin_sdh.c
index 0366617..a17dd73 100644
--- a/drivers/mmc/host/bfin_sdh.c
+++ b/drivers/mmc/host/bfin_sdh.c
@@ -49,13 +49,6 @@
 #define bfin_write_SDH_CFG		bfin_write_RSI_CFG
 #endif
 
-struct dma_desc_array {
-	unsigned long	start_addr;
-	unsigned short	cfg;
-	unsigned short	x_count;
-	short		x_modify;
-} __packed;
-
 struct sdh_host {
 	struct mmc_host		*mmc;
 	spinlock_t		lock;
diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 72dc3cd..af40d22 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -627,6 +627,7 @@
 {
 	struct dw_mci *host = slot->host;
 	u32 div;
+	u32 clk_en_a;
 
 	if (slot->clock != host->current_speed) {
 		div = host->bus_hz / slot->clock;
@@ -659,9 +660,11 @@
 		mci_send_cmd(slot,
 			     SDMMC_CMD_UPD_CLK | SDMMC_CMD_PRV_DAT_WAIT, 0);
 
-		/* enable clock */
-		mci_writel(host, CLKENA, ((SDMMC_CLKEN_ENABLE |
-			   SDMMC_CLKEN_LOW_PWR) << slot->id));
+		/* enable clock; only low power if no SDIO */
+		clk_en_a = SDMMC_CLKEN_ENABLE << slot->id;
+		if (!(mci_readl(host, INTMASK) & SDMMC_INT_SDIO(slot->id)))
+			clk_en_a |= SDMMC_CLKEN_LOW_PWR << slot->id;
+		mci_writel(host, CLKENA, clk_en_a);
 
 		/* inform CIU */
 		mci_send_cmd(slot,
@@ -862,6 +865,30 @@
 	return present;
 }
 
+/*
+ * Disable lower power mode.
+ *
+ * Low power mode will stop the card clock when idle.  According to the
+ * description of the CLKENA register we should disable low power mode
+ * for SDIO cards if we need SDIO interrupts to work.
+ *
+ * This function is fast if low power mode is already disabled.
+ */
+static void dw_mci_disable_low_power(struct dw_mci_slot *slot)
+{
+	struct dw_mci *host = slot->host;
+	u32 clk_en_a;
+	const u32 clken_low_pwr = SDMMC_CLKEN_LOW_PWR << slot->id;
+
+	clk_en_a = mci_readl(host, CLKENA);
+
+	if (clk_en_a & clken_low_pwr) {
+		mci_writel(host, CLKENA, clk_en_a & ~clken_low_pwr);
+		mci_send_cmd(slot, SDMMC_CMD_UPD_CLK |
+			     SDMMC_CMD_PRV_DAT_WAIT, 0);
+	}
+}
+
 static void dw_mci_enable_sdio_irq(struct mmc_host *mmc, int enb)
 {
 	struct dw_mci_slot *slot = mmc_priv(mmc);
@@ -871,6 +898,14 @@
 	/* Enable/disable Slot Specific SDIO interrupt */
 	int_mask = mci_readl(host, INTMASK);
 	if (enb) {
+		/*
+		 * Turn off low power mode if it was enabled.  This is a bit of
+		 * a heavy operation and we disable / enable IRQs a lot, so
+		 * we'll leave low power mode disabled and it will get
+		 * re-enabled again in dw_mci_setup_bus().
+		 */
+		dw_mci_disable_low_power(slot);
+
 		mci_writel(host, INTMASK,
 			   (int_mask | SDMMC_INT_SDIO(slot->id)));
 	} else {
@@ -1429,22 +1464,10 @@
 			nbytes += len;
 			remain -= len;
 		} while (remain);
-		sg_miter->consumed = offset;
 
+		sg_miter->consumed = offset;
 		status = mci_readl(host, MINTSTS);
 		mci_writel(host, RINTSTS, SDMMC_INT_RXDR);
-		if (status & DW_MCI_DATA_ERROR_FLAGS) {
-			host->data_status = status;
-			data->bytes_xfered += nbytes;
-			sg_miter_stop(sg_miter);
-			host->sg = NULL;
-			smp_wmb();
-
-			set_bit(EVENT_DATA_ERROR, &host->pending_events);
-
-			tasklet_schedule(&host->tasklet);
-			return;
-		}
 	} while (status & SDMMC_INT_RXDR); /*if the RXDR is ready read again*/
 	data->bytes_xfered += nbytes;
 
@@ -1497,23 +1520,10 @@
 			nbytes += len;
 			remain -= len;
 		} while (remain);
-		sg_miter->consumed = offset;
 
+		sg_miter->consumed = offset;
 		status = mci_readl(host, MINTSTS);
 		mci_writel(host, RINTSTS, SDMMC_INT_TXDR);
-		if (status & DW_MCI_DATA_ERROR_FLAGS) {
-			host->data_status = status;
-			data->bytes_xfered += nbytes;
-			sg_miter_stop(sg_miter);
-			host->sg = NULL;
-
-			smp_wmb();
-
-			set_bit(EVENT_DATA_ERROR, &host->pending_events);
-
-			tasklet_schedule(&host->tasklet);
-			return;
-		}
 	} while (status & SDMMC_INT_TXDR); /* if TXDR write again */
 	data->bytes_xfered += nbytes;
 
@@ -1547,12 +1557,11 @@
 static irqreturn_t dw_mci_interrupt(int irq, void *dev_id)
 {
 	struct dw_mci *host = dev_id;
-	u32 status, pending;
+	u32 pending;
 	unsigned int pass_count = 0;
 	int i;
 
 	do {
-		status = mci_readl(host, RINTSTS);
 		pending = mci_readl(host, MINTSTS); /* read-only mask reg */
 
 		/*
@@ -1570,7 +1579,7 @@
 
 		if (pending & DW_MCI_CMD_ERROR_FLAGS) {
 			mci_writel(host, RINTSTS, DW_MCI_CMD_ERROR_FLAGS);
-			host->cmd_status = status;
+			host->cmd_status = pending;
 			smp_wmb();
 			set_bit(EVENT_CMD_COMPLETE, &host->pending_events);
 		}
@@ -1578,18 +1587,16 @@
 		if (pending & DW_MCI_DATA_ERROR_FLAGS) {
 			/* if there is an error report DATA_ERROR */
 			mci_writel(host, RINTSTS, DW_MCI_DATA_ERROR_FLAGS);
-			host->data_status = status;
+			host->data_status = pending;
 			smp_wmb();
 			set_bit(EVENT_DATA_ERROR, &host->pending_events);
-			if (!(pending & (SDMMC_INT_DTO | SDMMC_INT_DCRC |
-					 SDMMC_INT_SBE | SDMMC_INT_EBE)))
-				tasklet_schedule(&host->tasklet);
+			tasklet_schedule(&host->tasklet);
 		}
 
 		if (pending & SDMMC_INT_DATA_OVER) {
 			mci_writel(host, RINTSTS, SDMMC_INT_DATA_OVER);
 			if (!host->data_status)
-				host->data_status = status;
+				host->data_status = pending;
 			smp_wmb();
 			if (host->dir_status == DW_MCI_RECV_STATUS) {
 				if (host->sg != NULL)
@@ -1613,7 +1620,7 @@
 
 		if (pending & SDMMC_INT_CMD_DONE) {
 			mci_writel(host, RINTSTS, SDMMC_INT_CMD_DONE);
-			dw_mci_cmd_interrupt(host, status);
+			dw_mci_cmd_interrupt(host, pending);
 		}
 
 		if (pending & SDMMC_INT_CD) {
diff --git a/drivers/mmc/host/mxs-mmc.c b/drivers/mmc/host/mxs-mmc.c
index a51f930..ad3fcea 100644
--- a/drivers/mmc/host/mxs-mmc.c
+++ b/drivers/mmc/host/mxs-mmc.c
@@ -285,11 +285,11 @@
 	writel(stat & MXS_MMC_IRQ_BITS,
 	       host->base + HW_SSP_CTRL1(host) + STMP_OFFSET_REG_CLR);
 
+	spin_unlock(&host->lock);
+
 	if ((stat & BM_SSP_CTRL1_SDIO_IRQ) && (stat & BM_SSP_CTRL1_SDIO_IRQ_EN))
 		mmc_signal_sdio_irq(host->mmc);
 
-	spin_unlock(&host->lock);
-
 	if (stat & BM_SSP_CTRL1_RESP_TIMEOUT_IRQ)
 		cmd->error = -ETIMEDOUT;
 	else if (stat & BM_SSP_CTRL1_RESP_ERR_IRQ)
@@ -644,11 +644,6 @@
 		       host->base + HW_SSP_CTRL0 + STMP_OFFSET_REG_SET);
 		writel(BM_SSP_CTRL1_SDIO_IRQ_EN,
 		       host->base + HW_SSP_CTRL1(host) + STMP_OFFSET_REG_SET);
-
-		if (readl(host->base + HW_SSP_STATUS(host)) &
-				BM_SSP_STATUS_SDIO_IRQ)
-			mmc_signal_sdio_irq(host->mmc);
-
 	} else {
 		writel(BM_SSP_CTRL0_SDIO_IRQ_CHECK,
 		       host->base + HW_SSP_CTRL0 + STMP_OFFSET_REG_CLR);
@@ -657,6 +652,11 @@
 	}
 
 	spin_unlock_irqrestore(&host->lock, flags);
+
+	if (enable && readl(host->base + HW_SSP_STATUS(host)) &
+			BM_SSP_STATUS_SDIO_IRQ)
+		mmc_signal_sdio_irq(host->mmc);
+
 }
 
 static const struct mmc_host_ops mxs_mmc_ops = {
diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c
index 50e08f0..a5999a7 100644
--- a/drivers/mmc/host/omap.c
+++ b/drivers/mmc/host/omap.c
@@ -668,7 +668,7 @@
 static void
 mmc_omap_xfer_data(struct mmc_omap_host *host, int write)
 {
-	int n;
+	int n, nwords;
 
 	if (host->buffer_bytes_left == 0) {
 		host->sg_idx++;
@@ -678,15 +678,23 @@
 	n = 64;
 	if (n > host->buffer_bytes_left)
 		n = host->buffer_bytes_left;
+
+	nwords = n / 2;
+	nwords += n & 1; /* handle odd number of bytes to transfer */
+
 	host->buffer_bytes_left -= n;
 	host->total_bytes_left -= n;
 	host->data->bytes_xfered += n;
 
 	if (write) {
-		__raw_writesw(host->virt_base + OMAP_MMC_REG(host, DATA), host->buffer, n);
+		__raw_writesw(host->virt_base + OMAP_MMC_REG(host, DATA),
+			      host->buffer, nwords);
 	} else {
-		__raw_readsw(host->virt_base + OMAP_MMC_REG(host, DATA), host->buffer, n);
+		__raw_readsw(host->virt_base + OMAP_MMC_REG(host, DATA),
+			     host->buffer, nwords);
 	}
+
+	host->buffer += nwords;
 }
 
 static inline void mmc_omap_report_irq(u16 status)
diff --git a/drivers/mmc/host/sdhci-esdhc.h b/drivers/mmc/host/sdhci-esdhc.h
index b97b2f5..d25f9ab 100644
--- a/drivers/mmc/host/sdhci-esdhc.h
+++ b/drivers/mmc/host/sdhci-esdhc.h
@@ -48,14 +48,14 @@
 	int div = 1;
 	u32 temp;
 
+	if (clock == 0)
+		goto out;
+
 	temp = sdhci_readl(host, ESDHC_SYSTEM_CONTROL);
 	temp &= ~(ESDHC_CLOCK_IPGEN | ESDHC_CLOCK_HCKEN | ESDHC_CLOCK_PEREN
 		| ESDHC_CLOCK_MASK);
 	sdhci_writel(host, temp, ESDHC_SYSTEM_CONTROL);
 
-	if (clock == 0)
-		goto out;
-
 	while (host->max_clk / pre_div / 16 > clock && pre_div < 256)
 		pre_div *= 2;
 
diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c
index 437bc19..568307c 100644
--- a/drivers/mtd/ubi/vtbl.c
+++ b/drivers/mtd/ubi/vtbl.c
@@ -340,7 +340,7 @@
 	 * of this LEB as it will be deleted and freed in 'ubi_add_to_av()'.
 	 */
 	err = ubi_add_to_av(ubi, ai, new_aeb->pnum, new_aeb->ec, vid_hdr, 0);
-	kfree(new_aeb);
+	kmem_cache_free(ai->aeb_slab_cache, new_aeb);
 	ubi_free_vid_hdr(ubi, vid_hdr);
 	return err;
 
@@ -353,7 +353,7 @@
 		list_add(&new_aeb->u.list, &ai->erase);
 		goto retry;
 	}
-	kfree(new_aeb);
+	kmem_cache_free(ai->aeb_slab_cache, new_aeb);
 out_free:
 	ubi_free_vid_hdr(ubi, vid_hdr);
 	return err;
diff --git a/drivers/net/can/sja1000/sja1000_platform.c b/drivers/net/can/sja1000/sja1000_platform.c
index 4f50145..662c5f7 100644
--- a/drivers/net/can/sja1000/sja1000_platform.c
+++ b/drivers/net/can/sja1000/sja1000_platform.c
@@ -109,7 +109,9 @@
 	priv = netdev_priv(dev);
 
 	dev->irq = res_irq->start;
-	priv->irq_flags = res_irq->flags & (IRQF_TRIGGER_MASK | IRQF_SHARED);
+	priv->irq_flags = res_irq->flags & IRQF_TRIGGER_MASK;
+	if (res_irq->flags & IORESOURCE_IRQ_SHAREABLE)
+		priv->irq_flags |= IRQF_SHARED;
 	priv->reg_base = addr;
 	/* The CAN clock frequency is half the oscillator clock frequency */
 	priv->can.clock.freq = pdata->osc_freq / 2;
diff --git a/drivers/net/can/softing/softing_fw.c b/drivers/net/can/softing/softing_fw.c
index 3105961..b595d34 100644
--- a/drivers/net/can/softing/softing_fw.c
+++ b/drivers/net/can/softing/softing_fw.c
@@ -150,7 +150,7 @@
 	const uint8_t *mem, *end, *dat;
 	uint16_t type, len;
 	uint32_t addr;
-	uint8_t *buf = NULL;
+	uint8_t *buf = NULL, *new_buf;
 	int buflen = 0;
 	int8_t type_end = 0;
 
@@ -199,11 +199,12 @@
 		if (len > buflen) {
 			/* align buflen */
 			buflen = (len + (1024-1)) & ~(1024-1);
-			buf = krealloc(buf, buflen, GFP_KERNEL);
-			if (!buf) {
+			new_buf = krealloc(buf, buflen, GFP_KERNEL);
+			if (!new_buf) {
 				ret = -ENOMEM;
 				goto failed;
 			}
+			buf = new_buf;
 		}
 		/* verify record data */
 		memcpy_fromio(buf, &dpram[addr + offset], len);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index 463b9ec..6d1a24a 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -1708,9 +1708,6 @@
 			continue;		\
 		else
 
-#define for_each_napi_rx_queue(bp, var) \
-	for ((var) = 0; (var) < bp->num_napi_queues; (var)++)
-
 /* Skip OOO FP */
 #define for_each_tx_queue(bp, var) \
 	for ((var) = 0; (var) < BNX2X_NUM_QUEUES(bp); (var)++) \
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index e879e19..af20c6e 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -2046,6 +2046,8 @@
 	 */
 	bnx2x_setup_tc(bp->dev, bp->max_cos);
 
+	/* Add all NAPI objects */
+	bnx2x_add_all_napi(bp);
 	bnx2x_napi_enable(bp);
 
 	/* set pf load just before approaching the MCP */
@@ -2408,6 +2410,8 @@
 
 		/* Disable HW interrupts, NAPI */
 		bnx2x_netif_stop(bp, 1);
+		/* Delete all NAPI objects */
+		bnx2x_del_all_napi(bp);
 
 		/* Release IRQs */
 		bnx2x_free_irq(bp);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index dfa757e..21b5532 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
@@ -792,7 +792,7 @@
 	bp->num_napi_queues = bp->num_queues;
 
 	/* Add NAPI objects */
-	for_each_napi_rx_queue(bp, i)
+	for_each_rx_queue(bp, i)
 		netif_napi_add(bp->dev, &bnx2x_fp(bp, i, napi),
 			       bnx2x_poll, BNX2X_NAPI_WEIGHT);
 }
@@ -801,7 +801,7 @@
 {
 	int i;
 
-	for_each_napi_rx_queue(bp, i)
+	for_each_rx_queue(bp, i)
 		netif_napi_del(&bnx2x_fp(bp, i, napi));
 }
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
index fc4e0e3..c37a68d 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
@@ -2888,11 +2888,9 @@
  */
 static void bnx2x_change_num_queues(struct bnx2x *bp, int num_rss)
 {
-	bnx2x_del_all_napi(bp);
 	bnx2x_disable_msi(bp);
 	BNX2X_NUM_QUEUES(bp) = num_rss + NON_ETH_CONTEXT_USE;
 	bnx2x_set_int_mode(bp);
-	bnx2x_add_all_napi(bp);
 }
 
 /**
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 02b5a34..2105498 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -8427,6 +8427,8 @@
 
 	/* Disable HW interrupts, NAPI */
 	bnx2x_netif_stop(bp, 1);
+	/* Delete all NAPI objects */
+	bnx2x_del_all_napi(bp);
 
 	/* Release IRQs */
 	bnx2x_free_irq(bp);
@@ -11229,10 +11231,12 @@
 static void poll_bnx2x(struct net_device *dev)
 {
 	struct bnx2x *bp = netdev_priv(dev);
+	int i;
 
-	disable_irq(bp->pdev->irq);
-	bnx2x_interrupt(bp->pdev->irq, dev);
-	enable_irq(bp->pdev->irq);
+	for_each_eth_queue(bp, i) {
+		struct bnx2x_fastpath *fp = &bp->fp[i];
+		napi_schedule(&bnx2x_fp(bp, fp->index, napi));
+	}
 }
 #endif
 
@@ -11899,9 +11903,6 @@
 	 */
 	bnx2x_set_int_mode(bp);
 
-	/* Add all NAPI objects */
-	bnx2x_add_all_napi(bp);
-
 	rc = register_netdev(dev);
 	if (rc) {
 		dev_err(&pdev->dev, "Cannot register net device\n");
@@ -11976,9 +11977,6 @@
 
 	unregister_netdev(dev);
 
-	/* Delete all NAPI objects */
-	bnx2x_del_all_napi(bp);
-
 	/* Power on: we can't let PCI layer write to us while we are in D3 */
 	bnx2x_set_power_state(bp, PCI_D0);
 
@@ -12025,6 +12023,8 @@
 	bnx2x_tx_disable(bp);
 
 	bnx2x_netif_stop(bp, 0);
+	/* Delete all NAPI objects */
+	bnx2x_del_all_napi(bp);
 
 	del_timer_sync(&bp->timer);
 
diff --git a/drivers/net/ethernet/cirrus/cs89x0.c b/drivers/net/ethernet/cirrus/cs89x0.c
index 845b202..1384469 100644
--- a/drivers/net/ethernet/cirrus/cs89x0.c
+++ b/drivers/net/ethernet/cirrus/cs89x0.c
@@ -1243,6 +1243,7 @@
 {
 	struct net_local *lp = netdev_priv(dev);
 	unsigned long flags;
+	u16 cfg;
 
 	spin_lock_irqsave(&lp->lock, flags);
 	if (dev->flags & IFF_PROMISC)
@@ -1260,11 +1261,10 @@
 	/* in promiscuous mode, we accept errored packets,
 	 * so we have to enable interrupts on them also
 	 */
-	writereg(dev, PP_RxCFG,
-		 (lp->curr_rx_cfg |
-		  (lp->rx_mode == RX_ALL_ACCEPT)
-		  ? (RX_CRC_ERROR_ENBL | RX_RUNT_ENBL | RX_EXTRA_DATA_ENBL)
-		  : 0));
+	cfg = lp->curr_rx_cfg;
+	if (lp->rx_mode == RX_ALL_ACCEPT)
+		cfg |= RX_CRC_ERROR_ENBL | RX_RUNT_ENBL | RX_EXTRA_DATA_ENBL;
+	writereg(dev, PP_RxCFG, cfg);
 	spin_unlock_irqrestore(&lp->lock, flags);
 }
 
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index 7fac97b..8c63d06 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
@@ -259,7 +259,7 @@
 	int num = 0, status = 0;
 	struct be_mcc_obj *mcc_obj = &adapter->mcc_obj;
 
-	spin_lock_bh(&adapter->mcc_cq_lock);
+	spin_lock(&adapter->mcc_cq_lock);
 	while ((compl = be_mcc_compl_get(adapter))) {
 		if (compl->flags & CQE_FLAGS_ASYNC_MASK) {
 			/* Interpret flags as an async trailer */
@@ -280,7 +280,7 @@
 	if (num)
 		be_cq_notify(adapter, mcc_obj->cq.id, mcc_obj->rearm_cq, num);
 
-	spin_unlock_bh(&adapter->mcc_cq_lock);
+	spin_unlock(&adapter->mcc_cq_lock);
 	return status;
 }
 
@@ -295,7 +295,9 @@
 		if (be_error(adapter))
 			return -EIO;
 
+		local_bh_disable();
 		status = be_process_mcc(adapter);
+		local_bh_enable();
 
 		if (atomic_read(&mcc_obj->q.used) == 0)
 			break;
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 90a903d8..78b8aa8 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -3763,7 +3763,9 @@
 	/* when interrupts are not yet enabled, just reap any pending
 	* mcc completions */
 	if (!netif_running(adapter->netdev)) {
+		local_bh_disable();
 		be_process_mcc(adapter);
+		local_bh_enable();
 		goto reschedule;
 	}
 
diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index 4605f72..d3233f5 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -1041,7 +1041,7 @@
 
 	if (priv->device_flags & FSL_GIANFAR_DEV_HAS_VLAN) {
 		dev->hw_features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX;
-		dev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX;
+		dev->features |= NETIF_F_HW_VLAN_RX;
 	}
 
 	if (priv->device_flags & FSL_GIANFAR_DEV_HAS_EXTENDED_HASH) {
diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h
index cd15332..cb3356c 100644
--- a/drivers/net/ethernet/intel/e1000e/e1000.h
+++ b/drivers/net/ethernet/intel/e1000e/e1000.h
@@ -310,6 +310,7 @@
 	 */
 	struct e1000_ring *tx_ring /* One per active queue */
 						____cacheline_aligned_in_smp;
+	u32 tx_fifo_limit;
 
 	struct napi_struct napi;
 
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 46c3b1f..d01a099 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -3517,6 +3517,15 @@
 	}
 
 	/*
+	 * Alignment of Tx data is on an arbitrary byte boundary with the
+	 * maximum size per Tx descriptor limited only to the transmit
+	 * allocation of the packet buffer minus 96 bytes with an upper
+	 * limit of 24KB due to receive synchronization limitations.
+	 */
+	adapter->tx_fifo_limit = min_t(u32, ((er32(PBA) >> 16) << 10) - 96,
+				       24 << 10);
+
+	/*
 	 * Disable Adaptive Interrupt Moderation if 2 full packets cannot
 	 * fit in receive buffer.
 	 */
@@ -4785,12 +4794,9 @@
 	return 1;
 }
 
-#define E1000_MAX_PER_TXD	8192
-#define E1000_MAX_TXD_PWR	12
-
 static int e1000_tx_map(struct e1000_ring *tx_ring, struct sk_buff *skb,
 			unsigned int first, unsigned int max_per_txd,
-			unsigned int nr_frags, unsigned int mss)
+			unsigned int nr_frags)
 {
 	struct e1000_adapter *adapter = tx_ring->adapter;
 	struct pci_dev *pdev = adapter->pdev;
@@ -5023,20 +5029,19 @@
 
 static int e1000_maybe_stop_tx(struct e1000_ring *tx_ring, int size)
 {
+	BUG_ON(size > tx_ring->count);
+
 	if (e1000_desc_unused(tx_ring) >= size)
 		return 0;
 	return __e1000_maybe_stop_tx(tx_ring, size);
 }
 
-#define TXD_USE_COUNT(S, X) (((S) >> (X)) + 1)
 static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
 				    struct net_device *netdev)
 {
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 	struct e1000_ring *tx_ring = adapter->tx_ring;
 	unsigned int first;
-	unsigned int max_per_txd = E1000_MAX_PER_TXD;
-	unsigned int max_txd_pwr = E1000_MAX_TXD_PWR;
 	unsigned int tx_flags = 0;
 	unsigned int len = skb_headlen(skb);
 	unsigned int nr_frags;
@@ -5056,18 +5061,8 @@
 	}
 
 	mss = skb_shinfo(skb)->gso_size;
-	/*
-	 * The controller does a simple calculation to
-	 * make sure there is enough room in the FIFO before
-	 * initiating the DMA for each buffer.  The calc is:
-	 * 4 = ceil(buffer len/mss).  To make sure we don't
-	 * overrun the FIFO, adjust the max buffer len if mss
-	 * drops.
-	 */
 	if (mss) {
 		u8 hdr_len;
-		max_per_txd = min(mss << 2, max_per_txd);
-		max_txd_pwr = fls(max_per_txd) - 1;
 
 		/*
 		 * TSO Workaround for 82571/2/3 Controllers -- if skb->data
@@ -5097,12 +5092,12 @@
 		count++;
 	count++;
 
-	count += TXD_USE_COUNT(len, max_txd_pwr);
+	count += DIV_ROUND_UP(len, adapter->tx_fifo_limit);
 
 	nr_frags = skb_shinfo(skb)->nr_frags;
 	for (f = 0; f < nr_frags; f++)
-		count += TXD_USE_COUNT(skb_frag_size(&skb_shinfo(skb)->frags[f]),
-				       max_txd_pwr);
+		count += DIV_ROUND_UP(skb_frag_size(&skb_shinfo(skb)->frags[f]),
+				      adapter->tx_fifo_limit);
 
 	if (adapter->hw.mac.tx_pkt_filtering)
 		e1000_transfer_dhcp_info(adapter, skb);
@@ -5144,15 +5139,18 @@
 		tx_flags |= E1000_TX_FLAGS_NO_FCS;
 
 	/* if count is 0 then mapping error has occurred */
-	count = e1000_tx_map(tx_ring, skb, first, max_per_txd, nr_frags, mss);
+	count = e1000_tx_map(tx_ring, skb, first, adapter->tx_fifo_limit,
+			     nr_frags);
 	if (count) {
 		skb_tx_timestamp(skb);
 
 		netdev_sent_queue(netdev, skb->len);
 		e1000_tx_queue(tx_ring, tx_flags, count);
 		/* Make sure there is space in the ring for the next send. */
-		e1000_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 2);
-
+		e1000_maybe_stop_tx(tx_ring,
+				    (MAX_SKB_FRAGS *
+				     DIV_ROUND_UP(PAGE_SIZE,
+						  adapter->tx_fifo_limit) + 2));
 	} else {
 		dev_kfree_skb_any(skb);
 		tx_ring->buffer_info[first].time_stamp = 0;
@@ -6327,8 +6325,8 @@
 	adapter->hw.phy.autoneg_advertised = 0x2f;
 
 	/* ring size defaults */
-	adapter->rx_ring->count = 256;
-	adapter->tx_ring->count = 256;
+	adapter->rx_ring->count = E1000_DEFAULT_RXD;
+	adapter->tx_ring->count = E1000_DEFAULT_TXD;
 
 	/*
 	 * Initial Wake on LAN setting - If APM wake is enabled in
diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c
index 8cba2df..5faedd8 100644
--- a/drivers/net/ethernet/sfc/ethtool.c
+++ b/drivers/net/ethernet/sfc/ethtool.c
@@ -863,8 +863,8 @@
 				       &ip_entry->ip4dst, &ip_entry->pdst);
 	if (rc != 0) {
 		rc = efx_filter_get_ipv4_full(
-			&spec, &proto, &ip_entry->ip4src, &ip_entry->psrc,
-			&ip_entry->ip4dst, &ip_entry->pdst);
+			&spec, &proto, &ip_entry->ip4dst, &ip_entry->pdst,
+			&ip_entry->ip4src, &ip_entry->psrc);
 		EFX_WARN_ON_PARANOID(rc);
 		ip_mask->ip4src = ~0;
 		ip_mask->psrc = ~0;
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index e2d0832..719be39 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -22,6 +22,9 @@
   Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
 *******************************************************************************/
 
+#ifndef __COMMON_H__
+#define __COMMON_H__
+
 #include <linux/etherdevice.h>
 #include <linux/netdevice.h>
 #include <linux/phy.h>
@@ -366,3 +369,5 @@
 
 extern void dwmac_dma_flush_tx_fifo(void __iomem *ioaddr);
 extern const struct stmmac_ring_mode_ops ring_mode_ops;
+
+#endif /* __COMMON_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/descs.h b/drivers/net/ethernet/stmicro/stmmac/descs.h
index 9820ec8..223adf9 100644
--- a/drivers/net/ethernet/stmicro/stmmac/descs.h
+++ b/drivers/net/ethernet/stmicro/stmmac/descs.h
@@ -20,6 +20,10 @@
 
   Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
 *******************************************************************************/
+
+#ifndef __DESCS_H__
+#define __DESCS_H__
+
 struct dma_desc {
 	/* Receive descriptor */
 	union {
@@ -166,3 +170,5 @@
 					 * is not calculated */
 	cic_full = 3,		/* IP header and pseudoheader */
 };
+
+#endif /* __DESCS_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/descs_com.h b/drivers/net/ethernet/stmicro/stmmac/descs_com.h
index dd8d6e1..7ee9499 100644
--- a/drivers/net/ethernet/stmicro/stmmac/descs_com.h
+++ b/drivers/net/ethernet/stmicro/stmmac/descs_com.h
@@ -27,6 +27,9 @@
   Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
 *******************************************************************************/
 
+#ifndef __DESC_COM_H__
+#define __DESC_COM_H__
+
 #if defined(CONFIG_STMMAC_RING)
 static inline void ehn_desc_rx_set_on_ring_chain(struct dma_desc *p, int end)
 {
@@ -124,3 +127,5 @@
 	p->des01.tx.buffer1_size = len;
 }
 #endif
+
+#endif /* __DESC_COM_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac100.h b/drivers/net/ethernet/stmicro/stmmac/dwmac100.h
index 7c6d857..2ec6aea 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac100.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac100.h
@@ -22,6 +22,9 @@
   Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
 *******************************************************************************/
 
+#ifndef __DWMAC100_H__
+#define __DWMAC100_H__
+
 #include <linux/phy.h>
 #include "common.h"
 
@@ -119,3 +122,5 @@
 #define DMA_MISSED_FRAME_M_CNTR	0x0000ffff	/* Missed Frame Couinter */
 
 extern const struct stmmac_dma_ops dwmac100_dma_ops;
+
+#endif /* __DWMAC100_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
index f90fcb5..0e4cace 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
@@ -19,6 +19,8 @@
 
   Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
 *******************************************************************************/
+#ifndef __DWMAC1000_H__
+#define __DWMAC1000_H__
 
 #include <linux/phy.h>
 #include "common.h"
@@ -229,6 +231,7 @@
 #define GMAC_MMC_RX_CSUM_OFFLOAD   0x208
 
 /* Synopsys Core versions */
-#define	DWMAC_CORE_3_40	34
+#define	DWMAC_CORE_3_40	0x34
 
 extern const struct stmmac_dma_ops dwmac1000_dma_ops;
+#endif /* __DWMAC1000_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h
index e678ce3..e49c9a0 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h
@@ -22,6 +22,9 @@
   Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
 *******************************************************************************/
 
+#ifndef __DWMAC_DMA_H__
+#define __DWMAC_DMA_H__
+
 /* DMA CRS Control and Status Register Mapping */
 #define DMA_BUS_MODE		0x00001000	/* Bus Mode */
 #define DMA_XMT_POLL_DEMAND	0x00001004	/* Transmit Poll Demand */
@@ -109,3 +112,5 @@
 extern void dwmac_dma_stop_rx(void __iomem *ioaddr);
 extern int dwmac_dma_interrupt(void __iomem *ioaddr,
 				struct stmmac_extra_stats *x);
+
+#endif /* __DWMAC_DMA_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/mmc.h b/drivers/net/ethernet/stmicro/stmmac/mmc.h
index a3835202..67995ef 100644
--- a/drivers/net/ethernet/stmicro/stmmac/mmc.h
+++ b/drivers/net/ethernet/stmicro/stmmac/mmc.h
@@ -22,6 +22,9 @@
   Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
 *******************************************************************************/
 
+#ifndef __MMC_H__
+#define __MMC_H__
+
 /* MMC control register */
 /* When set, all counter are reset */
 #define MMC_CNTRL_COUNTER_RESET		0x1
@@ -129,3 +132,5 @@
 extern void dwmac_mmc_ctrl(void __iomem *ioaddr, unsigned int mode);
 extern void dwmac_mmc_intr_all_mask(void __iomem *ioaddr);
 extern void dwmac_mmc_read(void __iomem *ioaddr, struct stmmac_counters *mmc);
+
+#endif /* __MMC_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c
index c07cfe9..0c74a70 100644
--- a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c
@@ -33,7 +33,7 @@
 #define MMC_TX_INTR		0x00000108	/* MMC TX Interrupt */
 #define MMC_RX_INTR_MASK	0x0000010c	/* MMC Interrupt Mask */
 #define MMC_TX_INTR_MASK	0x00000110	/* MMC Interrupt Mask */
-#define MMC_DEFAUL_MASK		0xffffffff
+#define MMC_DEFAULT_MASK		0xffffffff
 
 /* MMC TX counter registers */
 
@@ -147,8 +147,8 @@
 /* To mask all all interrupts.*/
 void dwmac_mmc_intr_all_mask(void __iomem *ioaddr)
 {
-	writel(MMC_DEFAUL_MASK, ioaddr + MMC_RX_INTR_MASK);
-	writel(MMC_DEFAUL_MASK, ioaddr + MMC_TX_INTR_MASK);
+	writel(MMC_DEFAULT_MASK, ioaddr + MMC_RX_INTR_MASK);
+	writel(MMC_DEFAULT_MASK, ioaddr + MMC_TX_INTR_MASK);
 }
 
 /* This reads the MAC core counters (if actaully supported).
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index f2d3665..e872e1d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -20,6 +20,9 @@
   Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
 *******************************************************************************/
 
+#ifndef __STMMAC_H__
+#define __STMMAC_H__
+
 #define STMMAC_RESOURCE_NAME   "stmmaceth"
 #define DRV_MODULE_VERSION	"March_2012"
 
@@ -166,3 +169,5 @@
 {
 }
 #endif /* CONFIG_STMMAC_PCI */
+
+#endif /* __STMMAC_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_timer.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_timer.h
index 6863590..aea9b14 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_timer.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_timer.h
@@ -21,6 +21,8 @@
 
   Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
 *******************************************************************************/
+#ifndef __STMMAC_TIMER_H__
+#define __STMMAC_TIMER_H__
 
 struct stmmac_timer {
 	void (*timer_start) (unsigned int new_freq);
@@ -40,3 +42,5 @@
 extern int tmu2_register_user(void *fnt, void *data);
 extern void tmu2_unregister_user(void);
 #endif
+
+#endif /* __STMMAC_TIMER_H__ */
diff --git a/drivers/net/ethernet/ti/davinci_mdio.c b/drivers/net/ethernet/ti/davinci_mdio.c
index cd7ee20..a9ca4a0 100644
--- a/drivers/net/ethernet/ti/davinci_mdio.c
+++ b/drivers/net/ethernet/ti/davinci_mdio.c
@@ -394,8 +394,10 @@
 	struct device *dev = &pdev->dev;
 	struct davinci_mdio_data *data = dev_get_drvdata(dev);
 
-	if (data->bus)
+	if (data->bus) {
+		mdiobus_unregister(data->bus);
 		mdiobus_free(data->bus);
+	}
 
 	if (data->clk)
 		clk_put(data->clk);
diff --git a/drivers/net/fddi/skfp/pmf.c b/drivers/net/fddi/skfp/pmf.c
index 24d8566..441b4dc 100644
--- a/drivers/net/fddi/skfp/pmf.c
+++ b/drivers/net/fddi/skfp/pmf.c
@@ -673,7 +673,7 @@
 			sm_pm_get_ls(smc,port_to_mib(smc,port))) ;
 		break ;
 	case SMT_P_REASON :
-		* (u_long *) to = 0 ;
+		*(u32 *)to = 0 ;
 		sp_len = 4 ;
 		goto sp_done ;
 	case SMT_P1033 :			/* time stamp */
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 328397c..adfab3f 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -413,7 +413,9 @@
 
 	/* 5. Gobi 2000 and 3000 devices */
 	{QMI_GOBI_DEVICE(0x413c, 0x8186)},	/* Dell Gobi 2000 Modem device (N0218, VU936) */
+	{QMI_GOBI_DEVICE(0x413c, 0x8194)},	/* Dell Gobi 3000 Composite */
 	{QMI_GOBI_DEVICE(0x05c6, 0x920b)},	/* Generic Gobi 2000 Modem device */
+	{QMI_GOBI_DEVICE(0x05c6, 0x920d)},	/* Gobi 3000 Composite */
 	{QMI_GOBI_DEVICE(0x05c6, 0x9225)},	/* Sony Gobi 2000 Modem device (N0279, VU730) */
 	{QMI_GOBI_DEVICE(0x05c6, 0x9245)},	/* Samsung Gobi 2000 Modem device (VL176) */
 	{QMI_GOBI_DEVICE(0x03f0, 0x251d)},	/* HP Gobi 2000 Modem device (VP412) */
@@ -441,6 +443,8 @@
 	{QMI_GOBI_DEVICE(0x1199, 0x9015)},	/* Sierra Wireless Gobi 3000 Modem device */
 	{QMI_GOBI_DEVICE(0x1199, 0x9019)},	/* Sierra Wireless Gobi 3000 Modem device */
 	{QMI_GOBI_DEVICE(0x1199, 0x901b)},	/* Sierra Wireless MC7770 */
+	{QMI_GOBI_DEVICE(0x12d1, 0x14f1)},	/* Sony Gobi 3000 Composite */
+	{QMI_GOBI_DEVICE(0x1410, 0xa021)},	/* Foxconn Gobi 3000 Modem device (Novatel E396) */
 
 	{ }					/* END */
 };
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 8531c1c..fd4b26d 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -1573,7 +1573,7 @@
 				netif_device_present(dev->net) &&
 				!timer_pending(&dev->delay) &&
 				!test_bit(EVENT_RX_HALT, &dev->flags))
-					rx_alloc_submit(dev, GFP_KERNEL);
+					rx_alloc_submit(dev, GFP_NOIO);
 
 			if (!(dev->txq.qlen >= TX_QLEN(dev)))
 				netif_tx_wake_all_queues(dev->net);
diff --git a/drivers/net/wireless/ath/ath5k/eeprom.c b/drivers/net/wireless/ath/ath5k/eeprom.c
index 4026c90..b7e0258 100644
--- a/drivers/net/wireless/ath/ath5k/eeprom.c
+++ b/drivers/net/wireless/ath/ath5k/eeprom.c
@@ -1482,7 +1482,7 @@
 	case AR5K_EEPROM_MODE_11A:
 		offset += AR5K_EEPROM_TARGET_PWR_OFF_11A(ee->ee_version);
 		rate_pcal_info = ee->ee_rate_tpwr_a;
-		ee->ee_rate_target_pwr_num[mode] = AR5K_EEPROM_N_5GHZ_CHAN;
+		ee->ee_rate_target_pwr_num[mode] = AR5K_EEPROM_N_5GHZ_RATE_CHAN;
 		break;
 	case AR5K_EEPROM_MODE_11B:
 		offset += AR5K_EEPROM_TARGET_PWR_OFF_11B(ee->ee_version);
diff --git a/drivers/net/wireless/ath/ath5k/eeprom.h b/drivers/net/wireless/ath/ath5k/eeprom.h
index dc2bcfe..94a9bbe 100644
--- a/drivers/net/wireless/ath/ath5k/eeprom.h
+++ b/drivers/net/wireless/ath/ath5k/eeprom.h
@@ -182,6 +182,7 @@
 #define AR5K_EEPROM_EEP_DELTA		10
 #define AR5K_EEPROM_N_MODES		3
 #define AR5K_EEPROM_N_5GHZ_CHAN		10
+#define AR5K_EEPROM_N_5GHZ_RATE_CHAN	8
 #define AR5K_EEPROM_N_2GHZ_CHAN		3
 #define AR5K_EEPROM_N_2GHZ_CHAN_2413	4
 #define	AR5K_EEPROM_N_2GHZ_CHAN_MAX	4
diff --git a/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c b/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c
index 192ad5c..a5edebe 100644
--- a/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c
+++ b/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c
@@ -1233,6 +1233,9 @@
 	/* dpc will not be rescheduled */
 	wl->resched = false;
 
+	/* inform publicly that interface is down */
+	wl->pub->up = false;
+
 	return 0;
 }
 
diff --git a/drivers/net/wireless/ipw2x00/ipw2100.c b/drivers/net/wireless/ipw2x00/ipw2100.c
index 95aa8e1..83324b3 100644
--- a/drivers/net/wireless/ipw2x00/ipw2100.c
+++ b/drivers/net/wireless/ipw2x00/ipw2100.c
@@ -2042,7 +2042,8 @@
 		return;
 	}
 	len = ETH_ALEN;
-	ipw2100_get_ordinal(priv, IPW_ORD_STAT_ASSN_AP_BSSID, &bssid, &len);
+	ret = ipw2100_get_ordinal(priv, IPW_ORD_STAT_ASSN_AP_BSSID, bssid,
+				  &len);
 	if (ret) {
 		IPW_DEBUG_INFO("failed querying ordinals at line %d\n",
 			       __LINE__);
diff --git a/drivers/net/wireless/iwlwifi/dvm/debugfs.c b/drivers/net/wireless/iwlwifi/dvm/debugfs.c
index 46782f1..a47b306 100644
--- a/drivers/net/wireless/iwlwifi/dvm/debugfs.c
+++ b/drivers/net/wireless/iwlwifi/dvm/debugfs.c
@@ -124,6 +124,9 @@
 	const struct fw_img *img;
 	size_t bufsz;
 
+	if (!iwl_is_ready_rf(priv))
+		return -EAGAIN;
+
 	/* default is to dump the entire data segment */
 	if (!priv->dbgfs_sram_offset && !priv->dbgfs_sram_len) {
 		priv->dbgfs_sram_offset = 0x800000;
diff --git a/drivers/net/wireless/iwlwifi/pcie/internal.h b/drivers/net/wireless/iwlwifi/pcie/internal.h
index d9694c5..4ffc18d 100644
--- a/drivers/net/wireless/iwlwifi/pcie/internal.h
+++ b/drivers/net/wireless/iwlwifi/pcie/internal.h
@@ -350,7 +350,7 @@
 /*****************************************************
 * Error handling
 ******************************************************/
-int iwl_dump_fh(struct iwl_trans *trans, char **buf, bool display);
+int iwl_dump_fh(struct iwl_trans *trans, char **buf);
 void iwl_dump_csr(struct iwl_trans *trans);
 
 /*****************************************************
diff --git a/drivers/net/wireless/iwlwifi/pcie/rx.c b/drivers/net/wireless/iwlwifi/pcie/rx.c
index 39a6ca1..d1a61ba 100644
--- a/drivers/net/wireless/iwlwifi/pcie/rx.c
+++ b/drivers/net/wireless/iwlwifi/pcie/rx.c
@@ -555,7 +555,7 @@
 	}
 
 	iwl_dump_csr(trans);
-	iwl_dump_fh(trans, NULL, false);
+	iwl_dump_fh(trans, NULL);
 
 	iwl_op_mode_nic_error(trans->op_mode);
 }
diff --git a/drivers/net/wireless/iwlwifi/pcie/trans.c b/drivers/net/wireless/iwlwifi/pcie/trans.c
index 939c2f7..1e86ea2 100644
--- a/drivers/net/wireless/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/iwlwifi/pcie/trans.c
@@ -1649,13 +1649,9 @@
 #undef IWL_CMD
 }
 
-int iwl_dump_fh(struct iwl_trans *trans, char **buf, bool display)
+int iwl_dump_fh(struct iwl_trans *trans, char **buf)
 {
 	int i;
-#ifdef CONFIG_IWLWIFI_DEBUG
-	int pos = 0;
-	size_t bufsz = 0;
-#endif
 	static const u32 fh_tbl[] = {
 		FH_RSCSR_CHNL0_STTS_WPTR_REG,
 		FH_RSCSR_CHNL0_RBDCB_BASE_REG,
@@ -1667,29 +1663,35 @@
 		FH_TSSR_TX_STATUS_REG,
 		FH_TSSR_TX_ERROR_REG
 	};
-#ifdef CONFIG_IWLWIFI_DEBUG
-	if (display) {
-		bufsz = ARRAY_SIZE(fh_tbl) * 48 + 40;
+
+#ifdef CONFIG_IWLWIFI_DEBUGFS
+	if (buf) {
+		int pos = 0;
+		size_t bufsz = ARRAY_SIZE(fh_tbl) * 48 + 40;
+
 		*buf = kmalloc(bufsz, GFP_KERNEL);
 		if (!*buf)
 			return -ENOMEM;
+
 		pos += scnprintf(*buf + pos, bufsz - pos,
 				"FH register values:\n");
-		for (i = 0; i < ARRAY_SIZE(fh_tbl); i++) {
+
+		for (i = 0; i < ARRAY_SIZE(fh_tbl); i++)
 			pos += scnprintf(*buf + pos, bufsz - pos,
 				"  %34s: 0X%08x\n",
 				get_fh_string(fh_tbl[i]),
 				iwl_read_direct32(trans, fh_tbl[i]));
-		}
+
 		return pos;
 	}
 #endif
+
 	IWL_ERR(trans, "FH register values:\n");
-	for (i = 0; i <  ARRAY_SIZE(fh_tbl); i++) {
+	for (i = 0; i <  ARRAY_SIZE(fh_tbl); i++)
 		IWL_ERR(trans, "  %34s: 0X%08x\n",
 			get_fh_string(fh_tbl[i]),
 			iwl_read_direct32(trans, fh_tbl[i]));
-	}
+
 	return 0;
 }
 
@@ -1982,11 +1984,11 @@
 				     size_t count, loff_t *ppos)
 {
 	struct iwl_trans *trans = file->private_data;
-	char *buf;
+	char *buf = NULL;
 	int pos = 0;
 	ssize_t ret = -EFAULT;
 
-	ret = pos = iwl_dump_fh(trans, &buf, true);
+	ret = pos = iwl_dump_fh(trans, &buf);
 	if (buf) {
 		ret = simple_read_from_buffer(user_buf,
 					      count, ppos, buf, pos);
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 3089990..650f79a 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -57,8 +57,7 @@
 static const struct ethtool_ops xennet_ethtool_ops;
 
 struct netfront_cb {
-	struct page *page;
-	unsigned offset;
+	int pull_to;
 };
 
 #define NETFRONT_SKB_CB(skb)	((struct netfront_cb *)((skb)->cb))
@@ -867,15 +866,9 @@
 	struct sk_buff *skb;
 
 	while ((skb = __skb_dequeue(rxq)) != NULL) {
-		struct page *page = NETFRONT_SKB_CB(skb)->page;
-		void *vaddr = page_address(page);
-		unsigned offset = NETFRONT_SKB_CB(skb)->offset;
+		int pull_to = NETFRONT_SKB_CB(skb)->pull_to;
 
-		memcpy(skb->data, vaddr + offset,
-		       skb_headlen(skb));
-
-		if (page != skb_frag_page(&skb_shinfo(skb)->frags[0]))
-			__free_page(page);
+		__pskb_pull_tail(skb, pull_to - skb_headlen(skb));
 
 		/* Ethernet work: Delayed to here as it peeks the header. */
 		skb->protocol = eth_type_trans(skb, dev);
@@ -913,7 +906,6 @@
 	struct sk_buff_head errq;
 	struct sk_buff_head tmpq;
 	unsigned long flags;
-	unsigned int len;
 	int err;
 
 	spin_lock(&np->rx_lock);
@@ -955,24 +947,13 @@
 			}
 		}
 
-		NETFRONT_SKB_CB(skb)->page =
-			skb_frag_page(&skb_shinfo(skb)->frags[0]);
-		NETFRONT_SKB_CB(skb)->offset = rx->offset;
+		NETFRONT_SKB_CB(skb)->pull_to = rx->status;
+		if (NETFRONT_SKB_CB(skb)->pull_to > RX_COPY_THRESHOLD)
+			NETFRONT_SKB_CB(skb)->pull_to = RX_COPY_THRESHOLD;
 
-		len = rx->status;
-		if (len > RX_COPY_THRESHOLD)
-			len = RX_COPY_THRESHOLD;
-		skb_put(skb, len);
-
-		if (rx->status > len) {
-			skb_shinfo(skb)->frags[0].page_offset =
-				rx->offset + len;
-			skb_frag_size_set(&skb_shinfo(skb)->frags[0], rx->status - len);
-			skb->data_len = rx->status - len;
-		} else {
-			__skb_fill_page_desc(skb, 0, NULL, 0, 0);
-			skb_shinfo(skb)->nr_frags = 0;
-		}
+		skb_shinfo(skb)->frags[0].page_offset = rx->offset;
+		skb_frag_size_set(&skb_shinfo(skb)->frags[0], rx->status);
+		skb->data_len = rx->status;
 
 		i = xennet_fill_frags(np, skb, &tmpq);
 
@@ -999,7 +980,7 @@
 		 * receive throughout using the standard receive
 		 * buffer size was cut by 25%(!!!).
 		 */
-		skb->truesize += skb->data_len - (RX_COPY_THRESHOLD - len);
+		skb->truesize += skb->data_len - RX_COPY_THRESHOLD;
 		skb->len += skb->data_len;
 
 		if (rx->flags & XEN_NETRXF_csum_blank)
diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c
index b8ef8dd..8aa73fa 100644
--- a/drivers/oprofile/cpu_buffer.c
+++ b/drivers/oprofile/cpu_buffer.c
@@ -451,14 +451,9 @@
 {
 	struct oprofile_cpu_buffer *b =
 		container_of(work, struct oprofile_cpu_buffer, work.work);
-	if (b->cpu != smp_processor_id()) {
-		printk(KERN_DEBUG "WQ on CPU%d, prefer CPU%d\n",
-		       smp_processor_id(), b->cpu);
-
-		if (!cpu_online(b->cpu)) {
-			cancel_delayed_work(&b->work);
-			return;
-		}
+	if (b->cpu != smp_processor_id() && !cpu_online(b->cpu)) {
+		cancel_delayed_work(&b->work);
+		return;
 	}
 	sync_buffer(b->cpu);
 
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 5270f1a..d6fd6b6 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -280,8 +280,12 @@
 {
 	struct drv_dev_and_id *ddi = _ddi;
 	struct device *dev = &ddi->dev->dev;
+	struct device *parent = dev->parent;
 	int rc;
 
+	/* The parent bridge must be in active state when probing */
+	if (parent)
+		pm_runtime_get_sync(parent);
 	/* Unbound PCI devices are always set to disabled and suspended.
 	 * During probe, the device is set to enabled and active and the
 	 * usage count is incremented.  If the driver supports runtime PM,
@@ -298,6 +302,8 @@
 		pm_runtime_set_suspended(dev);
 		pm_runtime_put_noidle(dev);
 	}
+	if (parent)
+		pm_runtime_put(parent);
 	return rc;
 }
 
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 6869009..02d107b 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -458,6 +458,40 @@
 }
 struct device_attribute vga_attr = __ATTR_RO(boot_vga);
 
+static void
+pci_config_pm_runtime_get(struct pci_dev *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device *parent = dev->parent;
+
+	if (parent)
+		pm_runtime_get_sync(parent);
+	pm_runtime_get_noresume(dev);
+	/*
+	 * pdev->current_state is set to PCI_D3cold during suspending,
+	 * so wait until suspending completes
+	 */
+	pm_runtime_barrier(dev);
+	/*
+	 * Only need to resume devices in D3cold, because config
+	 * registers are still accessible for devices suspended but
+	 * not in D3cold.
+	 */
+	if (pdev->current_state == PCI_D3cold)
+		pm_runtime_resume(dev);
+}
+
+static void
+pci_config_pm_runtime_put(struct pci_dev *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device *parent = dev->parent;
+
+	pm_runtime_put(dev);
+	if (parent)
+		pm_runtime_put_sync(parent);
+}
+
 static ssize_t
 pci_read_config(struct file *filp, struct kobject *kobj,
 		struct bin_attribute *bin_attr,
@@ -484,6 +518,8 @@
 		size = count;
 	}
 
+	pci_config_pm_runtime_get(dev);
+
 	if ((off & 1) && size) {
 		u8 val;
 		pci_user_read_config_byte(dev, off, &val);
@@ -529,6 +565,8 @@
 		--size;
 	}
 
+	pci_config_pm_runtime_put(dev);
+
 	return count;
 }
 
@@ -549,6 +587,8 @@
 		count = size;
 	}
 	
+	pci_config_pm_runtime_get(dev);
+
 	if ((off & 1) && size) {
 		pci_user_write_config_byte(dev, off, data[off - init_off]);
 		off++;
@@ -587,6 +627,8 @@
 		--size;
 	}
 
+	pci_config_pm_runtime_put(dev);
+
 	return count;
 }
 
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index f3ea977..ab4bf5a 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1941,6 +1941,7 @@
 	dev->pm_cap = pm;
 	dev->d3_delay = PCI_PM_D3_WAIT;
 	dev->d3cold_delay = PCI_PM_D3COLD_WAIT;
+	dev->d3cold_allowed = true;
 
 	dev->d1_support = false;
 	dev->d2_support = false;
diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c
index 3a7eefc..e76b447 100644
--- a/drivers/pci/pcie/portdrv_pci.c
+++ b/drivers/pci/pcie/portdrv_pci.c
@@ -140,9 +140,17 @@
 {
 	return 0;
 }
+
+static int pcie_port_runtime_idle(struct device *dev)
+{
+	/* Delay for a short while to prevent too frequent suspend/resume */
+	pm_schedule_suspend(dev, 10);
+	return -EBUSY;
+}
 #else
 #define pcie_port_runtime_suspend	NULL
 #define pcie_port_runtime_resume	NULL
+#define pcie_port_runtime_idle		NULL
 #endif
 
 static const struct dev_pm_ops pcie_portdrv_pm_ops = {
@@ -155,6 +163,7 @@
 	.resume_noirq	= pcie_port_resume_noirq,
 	.runtime_suspend = pcie_port_runtime_suspend,
 	.runtime_resume = pcie_port_runtime_resume,
+	.runtime_idle	= pcie_port_runtime_idle,
 };
 
 #define PCIE_PORTDRV_PM_OPS	(&pcie_portdrv_pm_ops)
@@ -200,6 +209,11 @@
 		return status;
 
 	pci_save_state(dev);
+	/*
+	 * D3cold may not work properly on some PCIe port, so disable
+	 * it by default.
+	 */
+	dev->d3cold_allowed = false;
 	if (!pci_match_id(port_runtime_pm_black_list, dev))
 		pm_runtime_put_noidle(&dev->dev);
 
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 6c143b4..9f8a6b7 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -144,15 +144,13 @@
 	case PCI_BASE_ADDRESS_MEM_TYPE_32:
 		break;
 	case PCI_BASE_ADDRESS_MEM_TYPE_1M:
-		dev_info(&dev->dev, "1M mem BAR treated as 32-bit BAR\n");
+		/* 1M mem BAR treated as 32-bit BAR */
 		break;
 	case PCI_BASE_ADDRESS_MEM_TYPE_64:
 		flags |= IORESOURCE_MEM_64;
 		break;
 	default:
-		dev_warn(&dev->dev,
-			 "mem unknown type %x treated as 32-bit BAR\n",
-			 mem_type);
+		/* mem unknown type treated as 32-bit BAR */
 		break;
 	}
 	return flags;
@@ -173,9 +171,11 @@
 	u32 l, sz, mask;
 	u16 orig_cmd;
 	struct pci_bus_region region;
+	bool bar_too_big = false, bar_disabled = false;
 
 	mask = type ? PCI_ROM_ADDRESS_MASK : ~0;
 
+	/* No printks while decoding is disabled! */
 	if (!dev->mmio_always_on) {
 		pci_read_config_word(dev, PCI_COMMAND, &orig_cmd);
 		pci_write_config_word(dev, PCI_COMMAND,
@@ -240,8 +240,7 @@
 			goto fail;
 
 		if ((sizeof(resource_size_t) < 8) && (sz64 > 0x100000000ULL)) {
-			dev_err(&dev->dev, "reg %x: can't handle 64-bit BAR\n",
-				pos);
+			bar_too_big = true;
 			goto fail;
 		}
 
@@ -252,12 +251,11 @@
 			region.start = 0;
 			region.end = sz64;
 			pcibios_bus_to_resource(dev, res, &region);
+			bar_disabled = true;
 		} else {
 			region.start = l64;
 			region.end = l64 + sz64;
 			pcibios_bus_to_resource(dev, res, &region);
-			dev_printk(KERN_DEBUG, &dev->dev, "reg %x: %pR\n",
-				   pos, res);
 		}
 	} else {
 		sz = pci_size(l, sz, mask);
@@ -268,18 +266,23 @@
 		region.start = l;
 		region.end = l + sz;
 		pcibios_bus_to_resource(dev, res, &region);
-
-		dev_printk(KERN_DEBUG, &dev->dev, "reg %x: %pR\n", pos, res);
 	}
 
- out:
+	goto out;
+
+
+fail:
+	res->flags = 0;
+out:
 	if (!dev->mmio_always_on)
 		pci_write_config_word(dev, PCI_COMMAND, orig_cmd);
 
+	if (bar_too_big)
+		dev_err(&dev->dev, "reg %x: can't handle 64-bit BAR\n", pos);
+	if (res->flags && !bar_disabled)
+		dev_printk(KERN_DEBUG, &dev->dev, "reg %x: %pR\n", pos, res);
+
 	return (res->flags & IORESOURCE_MEM_64) ? 1 : 0;
- fail:
-	res->flags = 0;
-	goto out;
 }
 
 static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom)
diff --git a/drivers/rtc/rtc-at91sam9.c b/drivers/rtc/rtc-at91sam9.c
index 8318689..1dd61f4 100644
--- a/drivers/rtc/rtc-at91sam9.c
+++ b/drivers/rtc/rtc-at91sam9.c
@@ -58,6 +58,7 @@
 	struct rtc_device	*rtcdev;
 	u32			imr;
 	void __iomem		*gpbr;
+	int 			irq;
 };
 
 #define rtt_readl(rtc, field) \
@@ -292,7 +293,7 @@
 {
 	struct resource	*r, *r_gpbr;
 	struct sam9_rtc	*rtc;
-	int		ret;
+	int		ret, irq;
 	u32		mr;
 
 	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -302,10 +303,18 @@
 		return -ENODEV;
 	}
 
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(&pdev->dev, "failed to get interrupt resource\n");
+		return irq;
+	}
+
 	rtc = kzalloc(sizeof *rtc, GFP_KERNEL);
 	if (!rtc)
 		return -ENOMEM;
 
+	rtc->irq = irq;
+
 	/* platform setup code should have handled this; sigh */
 	if (!device_can_wakeup(&pdev->dev))
 		device_init_wakeup(&pdev->dev, 1);
@@ -345,11 +354,10 @@
 	}
 
 	/* register irq handler after we know what name we'll use */
-	ret = request_irq(AT91_ID_SYS, at91_rtc_interrupt,
-				IRQF_SHARED,
+	ret = request_irq(rtc->irq, at91_rtc_interrupt, IRQF_SHARED,
 				dev_name(&rtc->rtcdev->dev), rtc);
 	if (ret) {
-		dev_dbg(&pdev->dev, "can't share IRQ %d?\n", AT91_ID_SYS);
+		dev_dbg(&pdev->dev, "can't share IRQ %d?\n", rtc->irq);
 		rtc_device_unregister(rtc->rtcdev);
 		goto fail_register;
 	}
@@ -386,7 +394,7 @@
 
 	/* disable all interrupts */
 	rtt_writel(rtc, MR, mr & ~(AT91_RTT_ALMIEN | AT91_RTT_RTTINCIEN));
-	free_irq(AT91_ID_SYS, rtc);
+	free_irq(rtc->irq, rtc);
 
 	rtc_device_unregister(rtc->rtcdev);
 
@@ -423,7 +431,7 @@
 	rtc->imr = mr & (AT91_RTT_ALMIEN | AT91_RTT_RTTINCIEN);
 	if (rtc->imr) {
 		if (device_may_wakeup(&pdev->dev) && (mr & AT91_RTT_ALMIEN)) {
-			enable_irq_wake(AT91_ID_SYS);
+			enable_irq_wake(rtc->irq);
 			/* don't let RTTINC cause wakeups */
 			if (mr & AT91_RTT_RTTINCIEN)
 				rtt_writel(rtc, MR, mr & ~AT91_RTT_RTTINCIEN);
@@ -441,7 +449,7 @@
 
 	if (rtc->imr) {
 		if (device_may_wakeup(&pdev->dev))
-			disable_irq_wake(AT91_ID_SYS);
+			disable_irq_wake(rtc->irq);
 		mr = rtt_readl(rtc, MR);
 		rtt_writel(rtc, MR, mr | rtc->imr);
 	}
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 40a826a..2fb2b9e 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -3804,7 +3804,7 @@
 	case BIODASDSYMMIO:
 		return dasd_symm_io(device, argp);
 	default:
-		return -ENOIOCTLCMD;
+		return -ENOTTY;
 	}
 }
 
diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c
index cceae70..654c692 100644
--- a/drivers/s390/block/dasd_ioctl.c
+++ b/drivers/s390/block/dasd_ioctl.c
@@ -498,12 +498,9 @@
 		break;
 	default:
 		/* if the discipline has an ioctl method try it. */
-		if (base->discipline->ioctl) {
+		rc = -ENOTTY;
+		if (base->discipline->ioctl)
 			rc = base->discipline->ioctl(block, cmd, argp);
-			if (rc == -ENOIOCTLCMD)
-				rc = -EINVAL;
-		} else
-			rc = -EINVAL;
 	}
 	dasd_put_device(base);
 	return rc;
diff --git a/drivers/video/auo_k190x.c b/drivers/video/auo_k190x.c
index 77da6a2..c03ecdd 100644
--- a/drivers/video/auo_k190x.c
+++ b/drivers/video/auo_k190x.c
@@ -987,7 +987,6 @@
 	fb_dealloc_cmap(&info->cmap);
 err_cmap:
 	fb_deferred_io_cleanup(info);
-	kfree(info->fbdefio);
 err_defio:
 	vfree((void *)info->screen_base);
 err_irq:
@@ -1022,7 +1021,6 @@
 	fb_dealloc_cmap(&info->cmap);
 
 	fb_deferred_io_cleanup(info);
-	kfree(info->fbdefio);
 
 	vfree((void *)info->screen_base);
 
diff --git a/drivers/video/console/bitblit.c b/drivers/video/console/bitblit.c
index 28b1a83..61b182b 100644
--- a/drivers/video/console/bitblit.c
+++ b/drivers/video/console/bitblit.c
@@ -162,7 +162,7 @@
 	image.depth = 1;
 
 	if (attribute) {
-		buf = kmalloc(cellsize, GFP_KERNEL);
+		buf = kmalloc(cellsize, GFP_ATOMIC);
 		if (!buf)
 			return;
 	}
diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c
index 88e9204..fdefa8f 100644
--- a/drivers/video/console/fbcon.c
+++ b/drivers/video/console/fbcon.c
@@ -449,7 +449,7 @@
 
 	while ((options = strsep(&this_opt, ",")) != NULL) {
 		if (!strncmp(options, "font:", 5))
-			strcpy(fontname, options + 5);
+			strlcpy(fontname, options + 5, sizeof(fontname));
 		
 		if (!strncmp(options, "scrollback:", 11)) {
 			options += 11;
diff --git a/drivers/video/mb862xx/mb862xxfbdrv.c b/drivers/video/mb862xx/mb862xxfbdrv.c
index 00ce1f3..57d940b 100644
--- a/drivers/video/mb862xx/mb862xxfbdrv.c
+++ b/drivers/video/mb862xx/mb862xxfbdrv.c
@@ -328,6 +328,8 @@
 	case MB862XX_L1_SET_CFG:
 		if (copy_from_user(l1_cfg, argp, sizeof(*l1_cfg)))
 			return -EFAULT;
+		if (l1_cfg->dh == 0 || l1_cfg->dw == 0)
+			return -EINVAL;
 		if ((l1_cfg->sw >= l1_cfg->dw) && (l1_cfg->sh >= l1_cfg->dh)) {
 			/* downscaling */
 			outreg(cap, GC_CAP_CSC,
diff --git a/drivers/video/omap2/dss/sdi.c b/drivers/video/omap2/dss/sdi.c
index 5d31699..f43bfe1 100644
--- a/drivers/video/omap2/dss/sdi.c
+++ b/drivers/video/omap2/dss/sdi.c
@@ -105,6 +105,20 @@
 
 	sdi_config_lcd_manager(dssdev);
 
+	/*
+	 * LCLK and PCLK divisors are located in shadow registers, and we
+	 * normally write them to DISPC registers when enabling the output.
+	 * However, SDI uses pck-free as source clock for its PLL, and pck-free
+	 * is affected by the divisors. And as we need the PLL before enabling
+	 * the output, we need to write the divisors early.
+	 *
+	 * It seems just writing to the DISPC register is enough, and we don't
+	 * need to care about the shadow register mechanism for pck-free. The
+	 * exact reason for this is unknown.
+	 */
+	dispc_mgr_set_clock_div(dssdev->manager->id,
+			&sdi.mgr_config.clock_info);
+
 	dss_sdi_init(dssdev->phy.sdi.datapairs);
 	r = dss_sdi_enable();
 	if (r)
diff --git a/drivers/video/omap2/omapfb/omapfb-main.c b/drivers/video/omap2/omapfb/omapfb-main.c
index 08ec1a7..fc671d3 100644
--- a/drivers/video/omap2/omapfb/omapfb-main.c
+++ b/drivers/video/omap2/omapfb/omapfb-main.c
@@ -1192,7 +1192,7 @@
 			break;
 
 		if (regno < 16) {
-			u16 pal;
+			u32 pal;
 			pal = ((red >> (16 - var->red.length)) <<
 					var->red.offset) |
 				((green >> (16 - var->green.length)) <<
diff --git a/drivers/watchdog/da9052_wdt.c b/drivers/watchdog/da9052_wdt.c
index 3f75129..f7abbae 100644
--- a/drivers/watchdog/da9052_wdt.c
+++ b/drivers/watchdog/da9052_wdt.c
@@ -21,7 +21,6 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/jiffies.h>
-#include <linux/delay.h>
 
 #include <linux/mfd/da9052/reg.h>
 #include <linux/mfd/da9052/da9052.h>
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 1afb4fb..4d51948 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -232,7 +232,7 @@
 		return ret;
 
 	if (hwdev && hwdev->coherent_dma_mask)
-		dma_mask = hwdev->coherent_dma_mask;
+		dma_mask = dma_alloc_coherent_mask(hwdev, flags);
 
 	phys = virt_to_phys(ret);
 	dev_addr = xen_phys_to_bus(phys);
diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c
index 097e536..0334272 100644
--- a/drivers/xen/xen-pciback/pci_stub.c
+++ b/drivers/xen/xen-pciback/pci_stub.c
@@ -353,16 +353,16 @@
 	if (err)
 		goto config_release;
 
-	dev_dbg(&dev->dev, "reseting (FLR, D3, etc) the device\n");
-	__pci_reset_function_locked(dev);
-
 	/* We need the device active to save the state. */
 	dev_dbg(&dev->dev, "save state of device\n");
 	pci_save_state(dev);
 	dev_data->pci_saved_state = pci_store_saved_state(dev);
 	if (!dev_data->pci_saved_state)
 		dev_err(&dev->dev, "Could not store PCI conf saved state!\n");
-
+	else {
+		dev_dbg(&dev->dev, "reseting (FLR, D3, etc) the device\n");
+		__pci_reset_function_locked(dev);
+	}
 	/* Now disable the device (this also ensures some private device
 	 * data is setup before we export)
 	 */
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index a256f3b..ff6475f 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1438,10 +1438,10 @@
 	ret = extent_from_logical(fs_info, logical, path,
 					&found_key);
 	btrfs_release_path(path);
-	if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK)
-		ret = -EINVAL;
 	if (ret < 0)
 		return ret;
+	if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK)
+		return -EINVAL;
 
 	extent_item_pos = logical - found_key.objectid;
 	ret = iterate_extent_inodes(fs_info, found_key.objectid,
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 86eff48..43d1c5a 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -818,6 +818,7 @@
 	btrfs_compress_op[idx]->free_workspace(workspace);
 	atomic_dec(alloc_workspace);
 wake:
+	smp_mb();
 	if (waitqueue_active(workspace_wait))
 		wake_up(workspace_wait);
 }
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 9d7621f..6d183f6 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -421,12 +421,6 @@
 	spin_unlock(&fs_info->tree_mod_seq_lock);
 
 	/*
-	 * we removed the lowest blocker from the blocker list, so there may be
-	 * more processible delayed refs.
-	 */
-	wake_up(&fs_info->tree_mod_seq_wait);
-
-	/*
 	 * anything that's lower than the lowest existing (read: blocked)
 	 * sequence number can be removed from the tree.
 	 */
@@ -631,6 +625,9 @@
 	u32 nritems;
 	int ret;
 
+	if (btrfs_header_level(eb) == 0)
+		return;
+
 	nritems = btrfs_header_nritems(eb);
 	for (i = nritems - 1; i >= 0; i--) {
 		ret = tree_mod_log_insert_key_locked(fs_info, eb, i,
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 4bab807..0d195b5 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1252,7 +1252,6 @@
 	atomic_t tree_mod_seq;
 	struct list_head tree_mod_seq_list;
 	struct seq_list tree_mod_seq_elem;
-	wait_queue_head_t tree_mod_seq_wait;
 
 	/* this protects tree_mod_log */
 	rwlock_t tree_mod_log_lock;
@@ -3192,7 +3191,7 @@
 int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
 			  struct bio *bio, u32 *dst);
 int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
-			      struct bio *bio, u64 logical_offset, u32 *dst);
+			      struct bio *bio, u64 logical_offset);
 int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
 			     struct btrfs_root *root,
 			     u64 objectid, u64 pos,
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 335605c..07d5eeb 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -512,8 +512,8 @@
 
 	rb_erase(&delayed_item->rb_node, root);
 	delayed_item->delayed_node->count--;
-	atomic_dec(&delayed_root->items);
-	if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND &&
+	if (atomic_dec_return(&delayed_root->items) <
+	    BTRFS_DELAYED_BACKGROUND &&
 	    waitqueue_active(&delayed_root->wait))
 		wake_up(&delayed_root->wait);
 }
@@ -1028,9 +1028,10 @@
 		btrfs_release_delayed_item(prev);
 		ret = 0;
 		btrfs_release_path(path);
-		if (curr)
+		if (curr) {
+			mutex_unlock(&node->mutex);
 			goto do_again;
-		else
+		} else
 			goto delete_fail;
 	}
 
@@ -1055,8 +1056,7 @@
 		delayed_node->count--;
 
 		delayed_root = delayed_node->root->fs_info->delayed_root;
-		atomic_dec(&delayed_root->items);
-		if (atomic_read(&delayed_root->items) <
+		if (atomic_dec_return(&delayed_root->items) <
 		    BTRFS_DELAYED_BACKGROUND &&
 		    waitqueue_active(&delayed_root->wait))
 			wake_up(&delayed_root->wait);
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index da7419e..ae94117 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -38,17 +38,14 @@
 static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2,
 			  struct btrfs_delayed_tree_ref *ref1)
 {
-	if (ref1->node.type == BTRFS_TREE_BLOCK_REF_KEY) {
-		if (ref1->root < ref2->root)
-			return -1;
-		if (ref1->root > ref2->root)
-			return 1;
-	} else {
-		if (ref1->parent < ref2->parent)
-			return -1;
-		if (ref1->parent > ref2->parent)
-			return 1;
-	}
+	if (ref1->root < ref2->root)
+		return -1;
+	if (ref1->root > ref2->root)
+		return 1;
+	if (ref1->parent < ref2->parent)
+		return -1;
+	if (ref1->parent > ref2->parent)
+		return 1;
 	return 0;
 }
 
@@ -85,7 +82,8 @@
  * type of the delayed backrefs and content of delayed backrefs.
  */
 static int comp_entry(struct btrfs_delayed_ref_node *ref2,
-		      struct btrfs_delayed_ref_node *ref1)
+		      struct btrfs_delayed_ref_node *ref1,
+		      bool compare_seq)
 {
 	if (ref1->bytenr < ref2->bytenr)
 		return -1;
@@ -102,10 +100,12 @@
 	if (ref1->type > ref2->type)
 		return 1;
 	/* merging of sequenced refs is not allowed */
-	if (ref1->seq < ref2->seq)
-		return -1;
-	if (ref1->seq > ref2->seq)
-		return 1;
+	if (compare_seq) {
+		if (ref1->seq < ref2->seq)
+			return -1;
+		if (ref1->seq > ref2->seq)
+			return 1;
+	}
 	if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
 	    ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) {
 		return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2),
@@ -139,7 +139,7 @@
 		entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
 				 rb_node);
 
-		cmp = comp_entry(entry, ins);
+		cmp = comp_entry(entry, ins, 1);
 		if (cmp < 0)
 			p = &(*p)->rb_left;
 		else if (cmp > 0)
@@ -233,6 +233,114 @@
 	return 0;
 }
 
+static void inline drop_delayed_ref(struct btrfs_trans_handle *trans,
+				    struct btrfs_delayed_ref_root *delayed_refs,
+				    struct btrfs_delayed_ref_node *ref)
+{
+	rb_erase(&ref->rb_node, &delayed_refs->root);
+	ref->in_tree = 0;
+	btrfs_put_delayed_ref(ref);
+	delayed_refs->num_entries--;
+	if (trans->delayed_ref_updates)
+		trans->delayed_ref_updates--;
+}
+
+static int merge_ref(struct btrfs_trans_handle *trans,
+		     struct btrfs_delayed_ref_root *delayed_refs,
+		     struct btrfs_delayed_ref_node *ref, u64 seq)
+{
+	struct rb_node *node;
+	int merged = 0;
+	int mod = 0;
+	int done = 0;
+
+	node = rb_prev(&ref->rb_node);
+	while (node) {
+		struct btrfs_delayed_ref_node *next;
+
+		next = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
+		node = rb_prev(node);
+		if (next->bytenr != ref->bytenr)
+			break;
+		if (seq && next->seq >= seq)
+			break;
+		if (comp_entry(ref, next, 0))
+			continue;
+
+		if (ref->action == next->action) {
+			mod = next->ref_mod;
+		} else {
+			if (ref->ref_mod < next->ref_mod) {
+				struct btrfs_delayed_ref_node *tmp;
+
+				tmp = ref;
+				ref = next;
+				next = tmp;
+				done = 1;
+			}
+			mod = -next->ref_mod;
+		}
+
+		merged++;
+		drop_delayed_ref(trans, delayed_refs, next);
+		ref->ref_mod += mod;
+		if (ref->ref_mod == 0) {
+			drop_delayed_ref(trans, delayed_refs, ref);
+			break;
+		} else {
+			/*
+			 * You can't have multiples of the same ref on a tree
+			 * block.
+			 */
+			WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
+				ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
+		}
+
+		if (done)
+			break;
+		node = rb_prev(&ref->rb_node);
+	}
+
+	return merged;
+}
+
+void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
+			      struct btrfs_fs_info *fs_info,
+			      struct btrfs_delayed_ref_root *delayed_refs,
+			      struct btrfs_delayed_ref_head *head)
+{
+	struct rb_node *node;
+	u64 seq = 0;
+
+	spin_lock(&fs_info->tree_mod_seq_lock);
+	if (!list_empty(&fs_info->tree_mod_seq_list)) {
+		struct seq_list *elem;
+
+		elem = list_first_entry(&fs_info->tree_mod_seq_list,
+					struct seq_list, list);
+		seq = elem->seq;
+	}
+	spin_unlock(&fs_info->tree_mod_seq_lock);
+
+	node = rb_prev(&head->node.rb_node);
+	while (node) {
+		struct btrfs_delayed_ref_node *ref;
+
+		ref = rb_entry(node, struct btrfs_delayed_ref_node,
+			       rb_node);
+		if (ref->bytenr != head->node.bytenr)
+			break;
+
+		/* We can't merge refs that are outside of our seq count */
+		if (seq && ref->seq >= seq)
+			break;
+		if (merge_ref(trans, delayed_refs, ref, seq))
+			node = rb_prev(&head->node.rb_node);
+		else
+			node = rb_prev(node);
+	}
+}
+
 int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
 			    struct btrfs_delayed_ref_root *delayed_refs,
 			    u64 seq)
@@ -336,18 +444,11 @@
 		 * every changing the extent allocation tree.
 		 */
 		existing->ref_mod--;
-		if (existing->ref_mod == 0) {
-			rb_erase(&existing->rb_node,
-				 &delayed_refs->root);
-			existing->in_tree = 0;
-			btrfs_put_delayed_ref(existing);
-			delayed_refs->num_entries--;
-			if (trans->delayed_ref_updates)
-				trans->delayed_ref_updates--;
-		} else {
+		if (existing->ref_mod == 0)
+			drop_delayed_ref(trans, delayed_refs, existing);
+		else
 			WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
 				existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
-		}
 	} else {
 		WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
 			existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
@@ -662,9 +763,6 @@
 	add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr,
 				   num_bytes, parent, ref_root, level, action,
 				   for_cow);
-	if (!need_ref_seq(for_cow, ref_root) &&
-	    waitqueue_active(&fs_info->tree_mod_seq_wait))
-		wake_up(&fs_info->tree_mod_seq_wait);
 	spin_unlock(&delayed_refs->lock);
 	if (need_ref_seq(for_cow, ref_root))
 		btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
@@ -713,9 +811,6 @@
 	add_delayed_data_ref(fs_info, trans, &ref->node, bytenr,
 				   num_bytes, parent, ref_root, owner, offset,
 				   action, for_cow);
-	if (!need_ref_seq(for_cow, ref_root) &&
-	    waitqueue_active(&fs_info->tree_mod_seq_wait))
-		wake_up(&fs_info->tree_mod_seq_wait);
 	spin_unlock(&delayed_refs->lock);
 	if (need_ref_seq(for_cow, ref_root))
 		btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
@@ -744,8 +839,6 @@
 				   num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
 				   extent_op->is_data);
 
-	if (waitqueue_active(&fs_info->tree_mod_seq_wait))
-		wake_up(&fs_info->tree_mod_seq_wait);
 	spin_unlock(&delayed_refs->lock);
 	return 0;
 }
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 0d7c90c..ab53005 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -167,6 +167,10 @@
 				struct btrfs_trans_handle *trans,
 				u64 bytenr, u64 num_bytes,
 				struct btrfs_delayed_extent_op *extent_op);
+void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
+			      struct btrfs_fs_info *fs_info,
+			      struct btrfs_delayed_ref_root *delayed_refs,
+			      struct btrfs_delayed_ref_head *head);
 
 struct btrfs_delayed_ref_head *
 btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 62e0caf..22e98e0 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -377,9 +377,13 @@
 		ret = read_extent_buffer_pages(io_tree, eb, start,
 					       WAIT_COMPLETE,
 					       btree_get_extent, mirror_num);
-		if (!ret && !verify_parent_transid(io_tree, eb,
+		if (!ret) {
+			if (!verify_parent_transid(io_tree, eb,
 						   parent_transid, 0))
-			break;
+				break;
+			else
+				ret = -EIO;
+		}
 
 		/*
 		 * This buffer's crc is fine, but its contents are corrupted, so
@@ -754,9 +758,7 @@
 	limit = btrfs_async_submit_limit(fs_info);
 	limit = limit * 2 / 3;
 
-	atomic_dec(&fs_info->nr_async_submits);
-
-	if (atomic_read(&fs_info->nr_async_submits) < limit &&
+	if (atomic_dec_return(&fs_info->nr_async_submits) < limit &&
 	    waitqueue_active(&fs_info->async_submit_wait))
 		wake_up(&fs_info->async_submit_wait);
 
@@ -2032,8 +2034,6 @@
 	fs_info->free_chunk_space = 0;
 	fs_info->tree_mod_log = RB_ROOT;
 
-	init_waitqueue_head(&fs_info->tree_mod_seq_wait);
-
 	/* readahead state */
 	INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT);
 	spin_lock_init(&fs_info->reada_lock);
@@ -2528,8 +2528,7 @@
 		goto fail_trans_kthread;
 
 	/* do not make disk changes in broken FS */
-	if (btrfs_super_log_root(disk_super) != 0 &&
-	    !(fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)) {
+	if (btrfs_super_log_root(disk_super) != 0) {
 		u64 bytenr = btrfs_super_log_root(disk_super);
 
 		if (fs_devices->rw_devices == 0) {
@@ -3189,30 +3188,14 @@
 	/* clear out the rbtree of defraggable inodes */
 	btrfs_run_defrag_inodes(fs_info);
 
-	/*
-	 * Here come 2 situations when btrfs is broken to flip readonly:
-	 *
-	 * 1. when btrfs flips readonly somewhere else before
-	 * btrfs_commit_super, sb->s_flags has MS_RDONLY flag,
-	 * and btrfs will skip to write sb directly to keep
-	 * ERROR state on disk.
-	 *
-	 * 2. when btrfs flips readonly just in btrfs_commit_super,
-	 * and in such case, btrfs cannot write sb via btrfs_commit_super,
-	 * and since fs_state has been set BTRFS_SUPER_FLAG_ERROR flag,
-	 * btrfs will cleanup all FS resources first and write sb then.
-	 */
 	if (!(fs_info->sb->s_flags & MS_RDONLY)) {
 		ret = btrfs_commit_super(root);
 		if (ret)
 			printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
 	}
 
-	if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
-		ret = btrfs_error_commit_super(root);
-		if (ret)
-			printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
-	}
+	if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
+		btrfs_error_commit_super(root);
 
 	btrfs_put_block_group_cache(fs_info);
 
@@ -3434,18 +3417,11 @@
 	if (read_only)
 		return 0;
 
-	if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
-		printk(KERN_WARNING "warning: mount fs with errors, "
-		       "running btrfsck is recommended\n");
-	}
-
 	return 0;
 }
 
-int btrfs_error_commit_super(struct btrfs_root *root)
+void btrfs_error_commit_super(struct btrfs_root *root)
 {
-	int ret;
-
 	mutex_lock(&root->fs_info->cleaner_mutex);
 	btrfs_run_delayed_iputs(root);
 	mutex_unlock(&root->fs_info->cleaner_mutex);
@@ -3455,10 +3431,6 @@
 
 	/* cleanup FS via transaction */
 	btrfs_cleanup_transaction(root);
-
-	ret = write_ctree_super(NULL, root, 0);
-
-	return ret;
 }
 
 static void btrfs_destroy_ordered_operations(struct btrfs_root *root)
@@ -3782,14 +3754,17 @@
 		/* FIXME: cleanup wait for commit */
 		t->in_commit = 1;
 		t->blocked = 1;
+		smp_mb();
 		if (waitqueue_active(&root->fs_info->transaction_blocked_wait))
 			wake_up(&root->fs_info->transaction_blocked_wait);
 
 		t->blocked = 0;
+		smp_mb();
 		if (waitqueue_active(&root->fs_info->transaction_wait))
 			wake_up(&root->fs_info->transaction_wait);
 
 		t->commit_done = 1;
+		smp_mb();
 		if (waitqueue_active(&t->commit_wait))
 			wake_up(&t->commit_wait);
 
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 95e147e..c5b00a7 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -54,7 +54,7 @@
 		      struct btrfs_root *root, int max_mirrors);
 struct buffer_head *btrfs_read_dev_super(struct block_device *bdev);
 int btrfs_commit_super(struct btrfs_root *root);
-int btrfs_error_commit_super(struct btrfs_root *root);
+void btrfs_error_commit_super(struct btrfs_root *root);
 struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
 					    u64 bytenr, u32 blocksize);
 struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 4e1b153..ba58024 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2252,6 +2252,16 @@
 		}
 
 		/*
+		 * We need to try and merge add/drops of the same ref since we
+		 * can run into issues with relocate dropping the implicit ref
+		 * and then it being added back again before the drop can
+		 * finish.  If we merged anything we need to re-loop so we can
+		 * get a good ref.
+		 */
+		btrfs_merge_delayed_refs(trans, fs_info, delayed_refs,
+					 locked_ref);
+
+		/*
 		 * locked_ref is the head node, so we have to go one
 		 * node back for any delayed ref updates
 		 */
@@ -2318,12 +2328,23 @@
 		ref->in_tree = 0;
 		rb_erase(&ref->rb_node, &delayed_refs->root);
 		delayed_refs->num_entries--;
-		/*
-		 * we modified num_entries, but as we're currently running
-		 * delayed refs, skip
-		 *     wake_up(&delayed_refs->seq_wait);
-		 * here.
-		 */
+		if (locked_ref) {
+			/*
+			 * when we play the delayed ref, also correct the
+			 * ref_mod on head
+			 */
+			switch (ref->action) {
+			case BTRFS_ADD_DELAYED_REF:
+			case BTRFS_ADD_DELAYED_EXTENT:
+				locked_ref->node.ref_mod -= ref->ref_mod;
+				break;
+			case BTRFS_DROP_DELAYED_REF:
+				locked_ref->node.ref_mod += ref->ref_mod;
+				break;
+			default:
+				WARN_ON(1);
+			}
+		}
 		spin_unlock(&delayed_refs->lock);
 
 		ret = run_one_delayed_ref(trans, root, ref, extent_op,
@@ -2350,22 +2371,6 @@
 	return count;
 }
 
-static void wait_for_more_refs(struct btrfs_fs_info *fs_info,
-			       struct btrfs_delayed_ref_root *delayed_refs,
-			       unsigned long num_refs,
-			       struct list_head *first_seq)
-{
-	spin_unlock(&delayed_refs->lock);
-	pr_debug("waiting for more refs (num %ld, first %p)\n",
-		 num_refs, first_seq);
-	wait_event(fs_info->tree_mod_seq_wait,
-		   num_refs != delayed_refs->num_entries ||
-		   fs_info->tree_mod_seq_list.next != first_seq);
-	pr_debug("done waiting for more refs (num %ld, first %p)\n",
-		 delayed_refs->num_entries, fs_info->tree_mod_seq_list.next);
-	spin_lock(&delayed_refs->lock);
-}
-
 #ifdef SCRAMBLE_DELAYED_REFS
 /*
  * Normally delayed refs get processed in ascending bytenr order. This
@@ -2460,13 +2465,11 @@
 	struct btrfs_delayed_ref_root *delayed_refs;
 	struct btrfs_delayed_ref_node *ref;
 	struct list_head cluster;
-	struct list_head *first_seq = NULL;
 	int ret;
 	u64 delayed_start;
 	int run_all = count == (unsigned long)-1;
 	int run_most = 0;
-	unsigned long num_refs = 0;
-	int consider_waiting;
+	int loops;
 
 	/* We'll clean this up in btrfs_cleanup_transaction */
 	if (trans->aborted)
@@ -2484,7 +2487,7 @@
 	delayed_refs = &trans->transaction->delayed_refs;
 	INIT_LIST_HEAD(&cluster);
 again:
-	consider_waiting = 0;
+	loops = 0;
 	spin_lock(&delayed_refs->lock);
 
 #ifdef SCRAMBLE_DELAYED_REFS
@@ -2512,31 +2515,6 @@
 		if (ret)
 			break;
 
-		if (delayed_start >= delayed_refs->run_delayed_start) {
-			if (consider_waiting == 0) {
-				/*
-				 * btrfs_find_ref_cluster looped. let's do one
-				 * more cycle. if we don't run any delayed ref
-				 * during that cycle (because we can't because
-				 * all of them are blocked) and if the number of
-				 * refs doesn't change, we avoid busy waiting.
-				 */
-				consider_waiting = 1;
-				num_refs = delayed_refs->num_entries;
-				first_seq = root->fs_info->tree_mod_seq_list.next;
-			} else {
-				wait_for_more_refs(root->fs_info, delayed_refs,
-						   num_refs, first_seq);
-				/*
-				 * after waiting, things have changed. we
-				 * dropped the lock and someone else might have
-				 * run some refs, built new clusters and so on.
-				 * therefore, we restart staleness detection.
-				 */
-				consider_waiting = 0;
-			}
-		}
-
 		ret = run_clustered_refs(trans, root, &cluster);
 		if (ret < 0) {
 			spin_unlock(&delayed_refs->lock);
@@ -2549,9 +2527,26 @@
 		if (count == 0)
 			break;
 
-		if (ret || delayed_refs->run_delayed_start == 0) {
+		if (delayed_start >= delayed_refs->run_delayed_start) {
+			if (loops == 0) {
+				/*
+				 * btrfs_find_ref_cluster looped. let's do one
+				 * more cycle. if we don't run any delayed ref
+				 * during that cycle (because we can't because
+				 * all of them are blocked), bail out.
+				 */
+				loops = 1;
+			} else {
+				/*
+				 * no runnable refs left, stop trying
+				 */
+				BUG_ON(run_all);
+				break;
+			}
+		}
+		if (ret) {
 			/* refs were run, let's reset staleness detection */
-			consider_waiting = 0;
+			loops = 0;
 		}
 	}
 
@@ -3007,17 +3002,16 @@
 	}
 	spin_unlock(&block_group->lock);
 
-	num_pages = (int)div64_u64(block_group->key.offset, 1024 * 1024 * 1024);
+	/*
+	 * Try to preallocate enough space based on how big the block group is.
+	 * Keep in mind this has to include any pinned space which could end up
+	 * taking up quite a bit since it's not folded into the other space
+	 * cache.
+	 */
+	num_pages = (int)div64_u64(block_group->key.offset, 256 * 1024 * 1024);
 	if (!num_pages)
 		num_pages = 1;
 
-	/*
-	 * Just to make absolutely sure we have enough space, we're going to
-	 * preallocate 12 pages worth of space for each block group.  In
-	 * practice we ought to use at most 8, but we need extra space so we can
-	 * add our header and have a terminator between the extents and the
-	 * bitmaps.
-	 */
 	num_pages *= 16;
 	num_pages *= PAGE_CACHE_SIZE;
 
@@ -4571,8 +4565,10 @@
 	if (root->fs_info->quota_enabled) {
 		ret = btrfs_qgroup_reserve(root, num_bytes +
 					   nr_extents * root->leafsize);
-		if (ret)
+		if (ret) {
+			mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
 			return ret;
+		}
 	}
 
 	ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
@@ -5294,9 +5290,6 @@
 	rb_erase(&head->node.rb_node, &delayed_refs->root);
 
 	delayed_refs->num_entries--;
-	smp_mb();
-	if (waitqueue_active(&root->fs_info->tree_mod_seq_wait))
-		wake_up(&root->fs_info->tree_mod_seq_wait);
 
 	/*
 	 * we don't take a ref on the node because we're removing it from the
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 45c81bb..4c87847 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2330,23 +2330,10 @@
 		if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
 			ret = tree->ops->readpage_end_io_hook(page, start, end,
 							      state, mirror);
-			if (ret) {
-				/* no IO indicated but software detected errors
-				 * in the block, either checksum errors or
-				 * issues with the contents */
-				struct btrfs_root *root =
-					BTRFS_I(page->mapping->host)->root;
-				struct btrfs_device *device;
-
+			if (ret)
 				uptodate = 0;
-				device = btrfs_find_device_for_logical(
-						root, start, mirror);
-				if (device)
-					btrfs_dev_stat_inc_and_print(device,
-						BTRFS_DEV_STAT_CORRUPTION_ERRS);
-			} else {
+			else
 				clean_io_failure(start, page);
-			}
 		}
 
 		if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) {
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index b45b9de..857d93c 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -272,9 +272,9 @@
 }
 
 int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
-			      struct bio *bio, u64 offset, u32 *dst)
+			      struct bio *bio, u64 offset)
 {
-	return __btrfs_lookup_bio_sums(root, inode, bio, offset, dst, 1);
+	return __btrfs_lookup_bio_sums(root, inode, bio, offset, NULL, 1);
 }
 
 int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 6e8f416..ec154f9 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1008,9 +1008,7 @@
 	nr_pages = (async_cow->end - async_cow->start + PAGE_CACHE_SIZE) >>
 		PAGE_CACHE_SHIFT;
 
-	atomic_sub(nr_pages, &root->fs_info->async_delalloc_pages);
-
-	if (atomic_read(&root->fs_info->async_delalloc_pages) <
+	if (atomic_sub_return(nr_pages, &root->fs_info->async_delalloc_pages) <
 	    5 * 1024 * 1024 &&
 	    waitqueue_active(&root->fs_info->async_submit_wait))
 		wake_up(&root->fs_info->async_submit_wait);
@@ -1885,8 +1883,11 @@
 				trans = btrfs_join_transaction_nolock(root);
 			else
 				trans = btrfs_join_transaction(root);
-			if (IS_ERR(trans))
-				return PTR_ERR(trans);
+			if (IS_ERR(trans)) {
+				ret = PTR_ERR(trans);
+				trans = NULL;
+				goto out;
+			}
 			trans->block_rsv = &root->fs_info->delalloc_block_rsv;
 			ret = btrfs_update_inode_fallback(trans, root, inode);
 			if (ret) /* -ENOMEM or corruption */
@@ -3174,7 +3175,7 @@
 	btrfs_i_size_write(dir, dir->i_size - name_len * 2);
 	inode_inc_iversion(dir);
 	dir->i_mtime = dir->i_ctime = CURRENT_TIME;
-	ret = btrfs_update_inode(trans, root, dir);
+	ret = btrfs_update_inode_fallback(trans, root, dir);
 	if (ret)
 		btrfs_abort_transaction(trans, root, ret);
 out:
@@ -5774,18 +5775,112 @@
 	return ret;
 }
 
+static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
+			      struct extent_state **cached_state, int writing)
+{
+	struct btrfs_ordered_extent *ordered;
+	int ret = 0;
+
+	while (1) {
+		lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+				 0, cached_state);
+		/*
+		 * We're concerned with the entire range that we're going to be
+		 * doing DIO to, so we need to make sure theres no ordered
+		 * extents in this range.
+		 */
+		ordered = btrfs_lookup_ordered_range(inode, lockstart,
+						     lockend - lockstart + 1);
+
+		/*
+		 * We need to make sure there are no buffered pages in this
+		 * range either, we could have raced between the invalidate in
+		 * generic_file_direct_write and locking the extent.  The
+		 * invalidate needs to happen so that reads after a write do not
+		 * get stale data.
+		 */
+		if (!ordered && (!writing ||
+		    !test_range_bit(&BTRFS_I(inode)->io_tree,
+				    lockstart, lockend, EXTENT_UPTODATE, 0,
+				    *cached_state)))
+			break;
+
+		unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+				     cached_state, GFP_NOFS);
+
+		if (ordered) {
+			btrfs_start_ordered_extent(inode, ordered, 1);
+			btrfs_put_ordered_extent(ordered);
+		} else {
+			/* Screw you mmap */
+			ret = filemap_write_and_wait_range(inode->i_mapping,
+							   lockstart,
+							   lockend);
+			if (ret)
+				break;
+
+			/*
+			 * If we found a page that couldn't be invalidated just
+			 * fall back to buffered.
+			 */
+			ret = invalidate_inode_pages2_range(inode->i_mapping,
+					lockstart >> PAGE_CACHE_SHIFT,
+					lockend >> PAGE_CACHE_SHIFT);
+			if (ret)
+				break;
+		}
+
+		cond_resched();
+	}
+
+	return ret;
+}
+
 static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
 				   struct buffer_head *bh_result, int create)
 {
 	struct extent_map *em;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct extent_state *cached_state = NULL;
 	u64 start = iblock << inode->i_blkbits;
+	u64 lockstart, lockend;
 	u64 len = bh_result->b_size;
 	struct btrfs_trans_handle *trans;
+	int unlock_bits = EXTENT_LOCKED;
+	int ret;
+
+	if (create) {
+		ret = btrfs_delalloc_reserve_space(inode, len);
+		if (ret)
+			return ret;
+		unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY;
+	} else {
+		len = min_t(u64, len, root->sectorsize);
+	}
+
+	lockstart = start;
+	lockend = start + len - 1;
+
+	/*
+	 * If this errors out it's because we couldn't invalidate pagecache for
+	 * this range and we need to fallback to buffered.
+	 */
+	if (lock_extent_direct(inode, lockstart, lockend, &cached_state, create))
+		return -ENOTBLK;
+
+	if (create) {
+		ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
+				     lockend, EXTENT_DELALLOC, NULL,
+				     &cached_state, GFP_NOFS);
+		if (ret)
+			goto unlock_err;
+	}
 
 	em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
-	if (IS_ERR(em))
-		return PTR_ERR(em);
+	if (IS_ERR(em)) {
+		ret = PTR_ERR(em);
+		goto unlock_err;
+	}
 
 	/*
 	 * Ok for INLINE and COMPRESSED extents we need to fallback on buffered
@@ -5804,17 +5899,16 @@
 	if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) ||
 	    em->block_start == EXTENT_MAP_INLINE) {
 		free_extent_map(em);
-		return -ENOTBLK;
+		ret = -ENOTBLK;
+		goto unlock_err;
 	}
 
 	/* Just a good old fashioned hole, return */
 	if (!create && (em->block_start == EXTENT_MAP_HOLE ||
 			test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
 		free_extent_map(em);
-		/* DIO will do one hole at a time, so just unlock a sector */
-		unlock_extent(&BTRFS_I(inode)->io_tree, start,
-			      start + root->sectorsize - 1);
-		return 0;
+		ret = 0;
+		goto unlock_err;
 	}
 
 	/*
@@ -5827,8 +5921,9 @@
 	 *
 	 */
 	if (!create) {
-		len = em->len - (start - em->start);
-		goto map;
+		len = min(len, em->len - (start - em->start));
+		lockstart = start + len;
+		goto unlock;
 	}
 
 	if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
@@ -5860,7 +5955,7 @@
 			btrfs_end_transaction(trans, root);
 			if (ret) {
 				free_extent_map(em);
-				return ret;
+				goto unlock_err;
 			}
 			goto unlock;
 		}
@@ -5873,14 +5968,12 @@
 	 */
 	len = bh_result->b_size;
 	em = btrfs_new_extent_direct(inode, em, start, len);
-	if (IS_ERR(em))
-		return PTR_ERR(em);
+	if (IS_ERR(em)) {
+		ret = PTR_ERR(em);
+		goto unlock_err;
+	}
 	len = min(len, em->len - (start - em->start));
 unlock:
-	clear_extent_bit(&BTRFS_I(inode)->io_tree, start, start + len - 1,
-			  EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DIRTY, 1,
-			  0, NULL, GFP_NOFS);
-map:
 	bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
 		inode->i_blkbits;
 	bh_result->b_size = len;
@@ -5898,9 +5991,44 @@
 			i_size_write(inode, start + len);
 	}
 
+	/*
+	 * In the case of write we need to clear and unlock the entire range,
+	 * in the case of read we need to unlock only the end area that we
+	 * aren't using if there is any left over space.
+	 */
+	if (lockstart < lockend) {
+		if (create && len < lockend - lockstart) {
+			clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
+					 lockstart + len - 1, unlock_bits, 1, 0,
+					 &cached_state, GFP_NOFS);
+			/*
+			 * Beside unlock, we also need to cleanup reserved space
+			 * for the left range by attaching EXTENT_DO_ACCOUNTING.
+			 */
+			clear_extent_bit(&BTRFS_I(inode)->io_tree,
+					 lockstart + len, lockend,
+					 unlock_bits | EXTENT_DO_ACCOUNTING,
+					 1, 0, NULL, GFP_NOFS);
+		} else {
+			clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
+					 lockend, unlock_bits, 1, 0,
+					 &cached_state, GFP_NOFS);
+		}
+	} else {
+		free_extent_state(cached_state);
+	}
+
 	free_extent_map(em);
 
 	return 0;
+
+unlock_err:
+	if (create)
+		unlock_bits |= EXTENT_DO_ACCOUNTING;
+
+	clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+			 unlock_bits, 1, 0, &cached_state, GFP_NOFS);
+	return ret;
 }
 
 struct btrfs_dio_private {
@@ -5908,7 +6036,6 @@
 	u64 logical_offset;
 	u64 disk_bytenr;
 	u64 bytes;
-	u32 *csums;
 	void *private;
 
 	/* number of bios pending for this dio */
@@ -5928,7 +6055,6 @@
 	struct inode *inode = dip->inode;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	u64 start;
-	u32 *private = dip->csums;
 
 	start = dip->logical_offset;
 	do {
@@ -5936,8 +6062,12 @@
 			struct page *page = bvec->bv_page;
 			char *kaddr;
 			u32 csum = ~(u32)0;
+			u64 private = ~(u32)0;
 			unsigned long flags;
 
+			if (get_state_private(&BTRFS_I(inode)->io_tree,
+					      start, &private))
+				goto failed;
 			local_irq_save(flags);
 			kaddr = kmap_atomic(page);
 			csum = btrfs_csum_data(root, kaddr + bvec->bv_offset,
@@ -5947,18 +6077,18 @@
 			local_irq_restore(flags);
 
 			flush_dcache_page(bvec->bv_page);
-			if (csum != *private) {
+			if (csum != private) {
+failed:
 				printk(KERN_ERR "btrfs csum failed ino %llu off"
 				      " %llu csum %u private %u\n",
 				      (unsigned long long)btrfs_ino(inode),
 				      (unsigned long long)start,
-				      csum, *private);
+				      csum, (unsigned)private);
 				err = -EIO;
 			}
 		}
 
 		start += bvec->bv_len;
-		private++;
 		bvec++;
 	} while (bvec <= bvec_end);
 
@@ -5966,7 +6096,6 @@
 		      dip->logical_offset + dip->bytes - 1);
 	bio->bi_private = dip->private;
 
-	kfree(dip->csums);
 	kfree(dip);
 
 	/* If we had a csum failure make sure to clear the uptodate flag */
@@ -6072,7 +6201,7 @@
 
 static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
 					 int rw, u64 file_offset, int skip_sum,
-					 u32 *csums, int async_submit)
+					 int async_submit)
 {
 	int write = rw & REQ_WRITE;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -6105,8 +6234,7 @@
 		if (ret)
 			goto err;
 	} else if (!skip_sum) {
-		ret = btrfs_lookup_bio_sums_dio(root, inode, bio,
-					  file_offset, csums);
+		ret = btrfs_lookup_bio_sums_dio(root, inode, bio, file_offset);
 		if (ret)
 			goto err;
 	}
@@ -6132,10 +6260,8 @@
 	u64 submit_len = 0;
 	u64 map_length;
 	int nr_pages = 0;
-	u32 *csums = dip->csums;
 	int ret = 0;
 	int async_submit = 0;
-	int write = rw & REQ_WRITE;
 
 	map_length = orig_bio->bi_size;
 	ret = btrfs_map_block(map_tree, READ, start_sector << 9,
@@ -6171,16 +6297,13 @@
 			atomic_inc(&dip->pending_bios);
 			ret = __btrfs_submit_dio_bio(bio, inode, rw,
 						     file_offset, skip_sum,
-						     csums, async_submit);
+						     async_submit);
 			if (ret) {
 				bio_put(bio);
 				atomic_dec(&dip->pending_bios);
 				goto out_err;
 			}
 
-			/* Write's use the ordered csums */
-			if (!write && !skip_sum)
-				csums = csums + nr_pages;
 			start_sector += submit_len >> 9;
 			file_offset += submit_len;
 
@@ -6210,7 +6333,7 @@
 
 submit:
 	ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum,
-				     csums, async_submit);
+				     async_submit);
 	if (!ret)
 		return 0;
 
@@ -6246,17 +6369,6 @@
 		ret = -ENOMEM;
 		goto free_ordered;
 	}
-	dip->csums = NULL;
-
-	/* Write's use the ordered csum stuff, so we don't need dip->csums */
-	if (!write && !skip_sum) {
-		dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS);
-		if (!dip->csums) {
-			kfree(dip);
-			ret = -ENOMEM;
-			goto free_ordered;
-		}
-	}
 
 	dip->private = bio->bi_private;
 	dip->inode = inode;
@@ -6341,132 +6453,22 @@
 out:
 	return retval;
 }
+
 static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
 			const struct iovec *iov, loff_t offset,
 			unsigned long nr_segs)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
-	struct btrfs_ordered_extent *ordered;
-	struct extent_state *cached_state = NULL;
-	u64 lockstart, lockend;
-	ssize_t ret;
-	int writing = rw & WRITE;
-	int write_bits = 0;
-	size_t count = iov_length(iov, nr_segs);
 
 	if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov,
-			    offset, nr_segs)) {
+			    offset, nr_segs))
 		return 0;
-	}
 
-	lockstart = offset;
-	lockend = offset + count - 1;
-
-	if (writing) {
-		ret = btrfs_delalloc_reserve_space(inode, count);
-		if (ret)
-			goto out;
-	}
-
-	while (1) {
-		lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
-				 0, &cached_state);
-		/*
-		 * We're concerned with the entire range that we're going to be
-		 * doing DIO to, so we need to make sure theres no ordered
-		 * extents in this range.
-		 */
-		ordered = btrfs_lookup_ordered_range(inode, lockstart,
-						     lockend - lockstart + 1);
-
-		/*
-		 * We need to make sure there are no buffered pages in this
-		 * range either, we could have raced between the invalidate in
-		 * generic_file_direct_write and locking the extent.  The
-		 * invalidate needs to happen so that reads after a write do not
-		 * get stale data.
-		 */
-		if (!ordered && (!writing ||
-		    !test_range_bit(&BTRFS_I(inode)->io_tree,
-				    lockstart, lockend, EXTENT_UPTODATE, 0,
-				    cached_state)))
-			break;
-
-		unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
-				     &cached_state, GFP_NOFS);
-
-		if (ordered) {
-			btrfs_start_ordered_extent(inode, ordered, 1);
-			btrfs_put_ordered_extent(ordered);
-		} else {
-			/* Screw you mmap */
-			ret = filemap_write_and_wait_range(file->f_mapping,
-							   lockstart,
-							   lockend);
-			if (ret)
-				goto out;
-
-			/*
-			 * If we found a page that couldn't be invalidated just
-			 * fall back to buffered.
-			 */
-			ret = invalidate_inode_pages2_range(file->f_mapping,
-					lockstart >> PAGE_CACHE_SHIFT,
-					lockend >> PAGE_CACHE_SHIFT);
-			if (ret) {
-				if (ret == -EBUSY)
-					ret = 0;
-				goto out;
-			}
-		}
-
-		cond_resched();
-	}
-
-	/*
-	 * we don't use btrfs_set_extent_delalloc because we don't want
-	 * the dirty or uptodate bits
-	 */
-	if (writing) {
-		write_bits = EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING;
-		ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
-				     EXTENT_DELALLOC, NULL, &cached_state,
-				     GFP_NOFS);
-		if (ret) {
-			clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
-					 lockend, EXTENT_LOCKED | write_bits,
-					 1, 0, &cached_state, GFP_NOFS);
-			goto out;
-		}
-	}
-
-	free_extent_state(cached_state);
-	cached_state = NULL;
-
-	ret = __blockdev_direct_IO(rw, iocb, inode,
+	return __blockdev_direct_IO(rw, iocb, inode,
 		   BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
 		   iov, offset, nr_segs, btrfs_get_blocks_direct, NULL,
 		   btrfs_submit_direct, 0);
-
-	if (ret < 0 && ret != -EIOCBQUEUED) {
-		clear_extent_bit(&BTRFS_I(inode)->io_tree, offset,
-			      offset + iov_length(iov, nr_segs) - 1,
-			      EXTENT_LOCKED | write_bits, 1, 0,
-			      &cached_state, GFP_NOFS);
-	} else if (ret >= 0 && ret < iov_length(iov, nr_segs)) {
-		/*
-		 * We're falling back to buffered, unlock the section we didn't
-		 * do IO on.
-		 */
-		clear_extent_bit(&BTRFS_I(inode)->io_tree, offset + ret,
-			      offset + iov_length(iov, nr_segs) - 1,
-			      EXTENT_LOCKED | write_bits, 1, 0,
-			      &cached_state, GFP_NOFS);
-	}
-out:
-	free_extent_state(cached_state);
-	return ret;
 }
 
 static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 7bb7556..9df50fa 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -424,7 +424,7 @@
 	uuid_le_gen(&new_uuid);
 	memcpy(root_item.uuid, new_uuid.b, BTRFS_UUID_SIZE);
 	root_item.otime.sec = cpu_to_le64(cur_time.tv_sec);
-	root_item.otime.nsec = cpu_to_le64(cur_time.tv_nsec);
+	root_item.otime.nsec = cpu_to_le32(cur_time.tv_nsec);
 	root_item.ctime = root_item.otime;
 	btrfs_set_root_ctransid(&root_item, trans->transid);
 	btrfs_set_root_otransid(&root_item, trans->transid);
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index a44eff074..2a1762c 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -67,7 +67,7 @@
 {
 	if (eb->lock_nested) {
 		read_lock(&eb->lock);
-		if (&eb->lock_nested && current->pid == eb->lock_owner) {
+		if (eb->lock_nested && current->pid == eb->lock_owner) {
 			read_unlock(&eb->lock);
 			return;
 		}
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index bc424ae..38b42e7 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1364,13 +1364,17 @@
 	spin_lock(&fs_info->qgroup_lock);
 
 	dstgroup = add_qgroup_rb(fs_info, objectid);
-	if (!dstgroup)
+	if (IS_ERR(dstgroup)) {
+		ret = PTR_ERR(dstgroup);
 		goto unlock;
+	}
 
 	if (srcid) {
 		srcgroup = find_qgroup_rb(fs_info, srcid);
-		if (!srcgroup)
+		if (!srcgroup) {
+			ret = -EINVAL;
 			goto unlock;
+		}
 		dstgroup->rfer = srcgroup->rfer - level_size;
 		dstgroup->rfer_cmpr = srcgroup->rfer_cmpr - level_size;
 		srcgroup->excl = level_size;
@@ -1379,8 +1383,10 @@
 		qgroup_dirty(fs_info, srcgroup);
 	}
 
-	if (!inherit)
+	if (!inherit) {
+		ret = -EINVAL;
 		goto unlock;
+	}
 
 	i_qgroups = (u64 *)(inherit + 1);
 	for (i = 0; i < inherit->num_qgroups; ++i) {
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 6bb465c..10d8e4d 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -544,8 +544,8 @@
 	struct timespec ct = CURRENT_TIME;
 
 	spin_lock(&root->root_times_lock);
-	item->ctransid = trans->transid;
+	item->ctransid = cpu_to_le64(trans->transid);
 	item->ctime.sec = cpu_to_le64(ct.tv_sec);
-	item->ctime.nsec = cpu_to_le64(ct.tv_nsec);
+	item->ctime.nsec = cpu_to_le32(ct.tv_nsec);
 	spin_unlock(&root->root_times_lock);
 }
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index f2eb24c..83d6f9f 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -838,7 +838,6 @@
 	struct btrfs_trans_handle *trans;
 	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
 	struct btrfs_root *root = fs_info->tree_root;
-	int ret;
 
 	trace_btrfs_sync_fs(wait);
 
@@ -849,11 +848,17 @@
 
 	btrfs_wait_ordered_extents(root, 0, 0);
 
-	trans = btrfs_start_transaction(root, 0);
+	spin_lock(&fs_info->trans_lock);
+	if (!fs_info->running_transaction) {
+		spin_unlock(&fs_info->trans_lock);
+		return 0;
+	}
+	spin_unlock(&fs_info->trans_lock);
+
+	trans = btrfs_join_transaction(root);
 	if (IS_ERR(trans))
 		return PTR_ERR(trans);
-	ret = btrfs_commit_transaction(trans, root);
-	return ret;
+	return btrfs_commit_transaction(trans, root);
 }
 
 static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
@@ -1530,6 +1535,8 @@
 	while (cur_devices) {
 		head = &cur_devices->devices;
 		list_for_each_entry(dev, head, dev_list) {
+			if (dev->missing)
+				continue;
 			if (!first_dev || dev->devid < first_dev->devid)
 				first_dev = dev;
 		}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 17be3de..27c2600 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1031,6 +1031,7 @@
 
 	btrfs_i_size_write(parent_inode, parent_inode->i_size +
 					 dentry->d_name.len * 2);
+	parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
 	ret = btrfs_update_inode(trans, parent_root, parent_inode);
 	if (ret)
 		goto abort_trans_dput;
@@ -1066,7 +1067,7 @@
 	memcpy(new_root_item->parent_uuid, root->root_item.uuid,
 			BTRFS_UUID_SIZE);
 	new_root_item->otime.sec = cpu_to_le64(cur_time.tv_sec);
-	new_root_item->otime.nsec = cpu_to_le64(cur_time.tv_nsec);
+	new_root_item->otime.nsec = cpu_to_le32(cur_time.tv_nsec);
 	btrfs_set_root_otransid(new_root_item, trans->transid);
 	memset(&new_root_item->stime, 0, sizeof(new_root_item->stime));
 	memset(&new_root_item->rtime, 0, sizeof(new_root_item->rtime));
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index e86ae04..88b969a 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -227,9 +227,8 @@
 		cur = pending;
 		pending = pending->bi_next;
 		cur->bi_next = NULL;
-		atomic_dec(&fs_info->nr_async_bios);
 
-		if (atomic_read(&fs_info->nr_async_bios) < limit &&
+		if (atomic_dec_return(&fs_info->nr_async_bios) < limit &&
 		    waitqueue_active(&fs_info->async_submit_wait))
 			wake_up(&fs_info->async_submit_wait);
 
@@ -569,9 +568,11 @@
 		memcpy(new_device, device, sizeof(*new_device));
 
 		/* Safe because we are under uuid_mutex */
-		name = rcu_string_strdup(device->name->str, GFP_NOFS);
-		BUG_ON(device->name && !name); /* -ENOMEM */
-		rcu_assign_pointer(new_device->name, name);
+		if (device->name) {
+			name = rcu_string_strdup(device->name->str, GFP_NOFS);
+			BUG_ON(device->name && !name); /* -ENOMEM */
+			rcu_assign_pointer(new_device->name, name);
+		}
 		new_device->bdev = NULL;
 		new_device->writeable = 0;
 		new_device->in_fs_metadata = 0;
@@ -4605,28 +4606,6 @@
 	return ret;
 }
 
-struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root,
-						   u64 logical, int mirror_num)
-{
-	struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
-	int ret;
-	u64 map_length = 0;
-	struct btrfs_bio *bbio = NULL;
-	struct btrfs_device *device;
-
-	BUG_ON(mirror_num == 0);
-	ret = btrfs_map_block(map_tree, WRITE, logical, &map_length, &bbio,
-			      mirror_num);
-	if (ret) {
-		BUG_ON(bbio != NULL);
-		return NULL;
-	}
-	BUG_ON(mirror_num != bbio->mirror_num);
-	device = bbio->stripes[mirror_num - 1].dev;
-	kfree(bbio);
-	return device;
-}
-
 int btrfs_read_chunk_tree(struct btrfs_root *root)
 {
 	struct btrfs_path *path;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 5479325..53c06af 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -289,8 +289,6 @@
 int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
 int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
 			 u64 *start, u64 *max_avail);
-struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root,
-						   u64 logical, int mirror_num);
 void btrfs_dev_stat_print_on_error(struct btrfs_device *device);
 void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index);
 int btrfs_get_dev_stats(struct btrfs_root *root,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 074923c..f0cf934 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1576,9 +1576,14 @@
 		/* result already set, check signature */
 		if (server->sec_mode &
 		    (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
-			if (cifs_verify_signature(rdata->iov, rdata->nr_iov,
-					  server, mid->sequence_number + 1))
-				cERROR(1, "Unexpected SMB signature");
+			int rc = 0;
+
+			rc = cifs_verify_signature(rdata->iov, rdata->nr_iov,
+						   server,
+						   mid->sequence_number + 1);
+			if (rc)
+				cERROR(1, "SMB signature verification returned "
+				       "error = %d", rc);
 		}
 		/* FIXME: should this be counted toward the initiating task? */
 		task_io_account_read(rdata->bytes);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index cbe709a..781025b 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -356,19 +356,12 @@
 cifs_create_set_dentry:
 	if (rc != 0) {
 		cFYI(1, "Create worked, get_inode_info failed rc = %d", rc);
+		CIFSSMBClose(xid, tcon, *fileHandle);
 		goto out;
 	}
 	d_drop(direntry);
 	d_add(direntry, newinode);
 
-	/* ENOENT for create?  How weird... */
-	rc = -ENOENT;
-	if (!newinode) {
-		CIFSSMBClose(xid, tcon, *fileHandle);
-		goto out;
-	}
-	rc = 0;
-
 out:
 	kfree(buf);
 	kfree(full_path);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 7354877..cb79c7e 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -124,10 +124,10 @@
 {
 	struct cifsInodeInfo *cifs_i = CIFS_I(inode);
 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
-	unsigned long oldtime = cifs_i->time;
 
 	cifs_revalidate_cache(inode, fattr);
 
+	spin_lock(&inode->i_lock);
 	inode->i_atime = fattr->cf_atime;
 	inode->i_mtime = fattr->cf_mtime;
 	inode->i_ctime = fattr->cf_ctime;
@@ -148,9 +148,6 @@
 	else
 		cifs_i->time = jiffies;
 
-	cFYI(1, "inode 0x%p old_time=%ld new_time=%ld", inode,
-		 oldtime, cifs_i->time);
-
 	cifs_i->delete_pending = fattr->cf_flags & CIFS_FATTR_DELETE_PENDING;
 
 	cifs_i->server_eof = fattr->cf_eof;
@@ -158,7 +155,6 @@
 	 * Can't safely change the file size here if the client is writing to
 	 * it due to potential races.
 	 */
-	spin_lock(&inode->i_lock);
 	if (is_size_safe_to_change(cifs_i, fattr->cf_eof)) {
 		i_size_write(inode, fattr->cf_eof);
 
@@ -859,12 +855,14 @@
 
 	if (rc && tcon->ipc) {
 		cFYI(1, "ipc connection - fake read inode");
+		spin_lock(&inode->i_lock);
 		inode->i_mode |= S_IFDIR;
 		set_nlink(inode, 2);
 		inode->i_op = &cifs_ipc_inode_ops;
 		inode->i_fop = &simple_dir_operations;
 		inode->i_uid = cifs_sb->mnt_uid;
 		inode->i_gid = cifs_sb->mnt_gid;
+		spin_unlock(&inode->i_lock);
 	} else if (rc) {
 		iget_failed(inode);
 		inode = ERR_PTR(rc);
@@ -1110,6 +1108,15 @@
 	goto out_close;
 }
 
+/* copied from fs/nfs/dir.c with small changes */
+static void
+cifs_drop_nlink(struct inode *inode)
+{
+	spin_lock(&inode->i_lock);
+	if (inode->i_nlink > 0)
+		drop_nlink(inode);
+	spin_unlock(&inode->i_lock);
+}
 
 /*
  * If dentry->d_inode is null (usually meaning the cached dentry
@@ -1166,13 +1173,13 @@
 psx_del_no_retry:
 	if (!rc) {
 		if (inode)
-			drop_nlink(inode);
+			cifs_drop_nlink(inode);
 	} else if (rc == -ENOENT) {
 		d_drop(dentry);
 	} else if (rc == -ETXTBSY) {
 		rc = cifs_rename_pending_delete(full_path, dentry, xid);
 		if (rc == 0)
-			drop_nlink(inode);
+			cifs_drop_nlink(inode);
 	} else if ((rc == -EACCES) && (dosattr == 0) && inode) {
 		attrs = kzalloc(sizeof(*attrs), GFP_KERNEL);
 		if (attrs == NULL) {
@@ -1241,9 +1248,10 @@
 	 * setting nlink not necessary except in cases where we failed to get it
 	 * from the server or was set bogus
 	 */
+	spin_lock(&dentry->d_inode->i_lock);
 	if ((dentry->d_inode) && (dentry->d_inode->i_nlink < 2))
 		set_nlink(dentry->d_inode, 2);
-
+	spin_unlock(&dentry->d_inode->i_lock);
 	mode &= ~current_umask();
 	/* must turn on setgid bit if parent dir has it */
 	if (inode->i_mode & S_ISGID)
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 09e4b3a..e6ce3b1 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -433,7 +433,9 @@
 	if (old_file->d_inode) {
 		cifsInode = CIFS_I(old_file->d_inode);
 		if (rc == 0) {
+			spin_lock(&old_file->d_inode->i_lock);
 			inc_nlink(old_file->d_inode);
+			spin_unlock(&old_file->d_inode->i_lock);
 /* BB should we make this contingent on superblock flag NOATIME? */
 /*			old_file->d_inode->i_ctime = CURRENT_TIME;*/
 			/* parent dir timestamps will update from srv
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index a4ff5d5..e4d3b99 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -52,7 +52,8 @@
 			cERROR(1, "Bad protocol string signature header %x",
 				  *(unsigned int *) hdr->ProtocolId);
 		if (mid != hdr->MessageId)
-			cERROR(1, "Mids do not match");
+			cERROR(1, "Mids do not match: %llu and %llu", mid,
+				  hdr->MessageId);
 	}
 	cERROR(1, "Bad SMB detected. The Mid=%llu", hdr->MessageId);
 	return 1;
@@ -107,7 +108,7 @@
 	 * ie Validate the wct via smb2_struct_sizes table above
 	 */
 
-	if (length < 2 + sizeof(struct smb2_hdr)) {
+	if (length < sizeof(struct smb2_pdu)) {
 		if ((length >= sizeof(struct smb2_hdr)) && (hdr->Status != 0)) {
 			pdu->StructureSize2 = 0;
 			/*
@@ -121,15 +122,15 @@
 		return 1;
 	}
 	if (len > CIFSMaxBufSize + MAX_SMB2_HDR_SIZE - 4) {
-		cERROR(1, "SMB length greater than maximum, mid=%lld", mid);
+		cERROR(1, "SMB length greater than maximum, mid=%llu", mid);
 		return 1;
 	}
 
 	if (check_smb2_hdr(hdr, mid))
 		return 1;
 
-	if (hdr->StructureSize != SMB2_HEADER_SIZE) {
-		cERROR(1, "Illegal structure size %d",
+	if (hdr->StructureSize != SMB2_HEADER_STRUCTURE_SIZE) {
+		cERROR(1, "Illegal structure size %u",
 			  le16_to_cpu(hdr->StructureSize));
 		return 1;
 	}
@@ -161,8 +162,9 @@
 	if (4 + len != clc_len) {
 		cFYI(1, "Calculated size %u length %u mismatch mid %llu",
 			clc_len, 4 + len, mid);
-		if (clc_len == 4 + len + 1) /* BB FIXME (fix samba) */
-			return 0; /* BB workaround Samba 3 bug SessSetup rsp */
+		/* server can return one byte more */
+		if (clc_len == 4 + len + 1)
+			return 0;
 		return 1;
 	}
 	return 0;
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index f37a1b4..c5fbfac 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -87,10 +87,6 @@
 
 #define SMB2_PROTO_NUMBER __constant_cpu_to_le32(0x424d53fe)
 
-#define SMB2_HEADER_SIZE __constant_le16_to_cpu(64)
-
-#define SMB2_ERROR_STRUCTURE_SIZE2 __constant_le16_to_cpu(9)
-
 /*
  * SMB2 Header Definition
  *
@@ -99,6 +95,9 @@
  * "PDU" :  "Protocol Data Unit" (ie a network "frame")
  *
  */
+
+#define SMB2_HEADER_STRUCTURE_SIZE __constant_le16_to_cpu(64)
+
 struct smb2_hdr {
 	__be32 smb2_buf_length;	/* big endian on wire */
 				/* length is only two or three bytes - with
@@ -140,6 +139,9 @@
  *  command code name for the struct. Note that structures must be packed.
  *
  */
+
+#define SMB2_ERROR_STRUCTURE_SIZE2 __constant_le16_to_cpu(9)
+
 struct smb2_err_rsp {
 	struct smb2_hdr hdr;
 	__le16 StructureSize;
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 83867ef..d9b639b 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -503,13 +503,16 @@
 	/* convert the length into a more usable form */
 	if (server->sec_mode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
 		struct kvec iov;
+		int rc = 0;
 
 		iov.iov_base = mid->resp_buf;
 		iov.iov_len = len;
 		/* FIXME: add code to kill session */
-		if (cifs_verify_signature(&iov, 1, server,
-					  mid->sequence_number + 1) != 0)
-			cERROR(1, "Unexpected SMB signature");
+		rc = cifs_verify_signature(&iov, 1, server,
+					   mid->sequence_number + 1);
+		if (rc)
+			cERROR(1, "SMB signature verification returned error = "
+			       "%d", rc);
 	}
 
 	/* BB special case reconnect tid and uid here? */
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index df0de27..e784a21 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -26,6 +26,7 @@
 	struct completion complete;
 
 	bio_init(&bio);
+	bio.bi_max_vecs = 1;
 	bio.bi_io_vec = &bio_vec;
 	bio_vec.bv_page = page;
 	bio_vec.bv_len = PAGE_SIZE;
@@ -95,12 +96,11 @@
 	struct address_space *mapping = super->s_mapping_inode->i_mapping;
 	struct bio *bio;
 	struct page *page;
-	struct request_queue *q = bdev_get_queue(sb->s_bdev);
-	unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9);
+	unsigned int max_pages;
 	int i;
 
-	if (max_pages > BIO_MAX_PAGES)
-		max_pages = BIO_MAX_PAGES;
+	max_pages = min(nr_pages, (size_t) bio_get_nr_vecs(super->s_bdev));
+
 	bio = bio_alloc(GFP_NOFS, max_pages);
 	BUG_ON(!bio);
 
@@ -190,12 +190,11 @@
 {
 	struct logfs_super *super = logfs_super(sb);
 	struct bio *bio;
-	struct request_queue *q = bdev_get_queue(sb->s_bdev);
-	unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9);
+	unsigned int max_pages;
 	int i;
 
-	if (max_pages > BIO_MAX_PAGES)
-		max_pages = BIO_MAX_PAGES;
+	max_pages = min(nr_pages, (size_t) bio_get_nr_vecs(super->s_bdev));
+
 	bio = bio_alloc(GFP_NOFS, max_pages);
 	BUG_ON(!bio);
 
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c
index a422f42..6984562 100644
--- a/fs/logfs/inode.c
+++ b/fs/logfs/inode.c
@@ -156,10 +156,26 @@
 	call_rcu(&inode->i_rcu, logfs_i_callback);
 }
 
+static void __logfs_destroy_meta_inode(struct inode *inode)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+	BUG_ON(li->li_block);
+	call_rcu(&inode->i_rcu, logfs_i_callback);
+}
+
 static void logfs_destroy_inode(struct inode *inode)
 {
 	struct logfs_inode *li = logfs_inode(inode);
 
+	if (inode->i_ino < LOGFS_RESERVED_INOS) {
+		/*
+		 * The reserved inodes are never destroyed unless we are in
+		 * unmont path.
+		 */
+		__logfs_destroy_meta_inode(inode);
+		return;
+	}
+
 	BUG_ON(list_empty(&li->li_freeing_list));
 	spin_lock(&logfs_inode_lock);
 	li->li_refcount--;
@@ -373,8 +389,8 @@
 {
 	struct logfs_super *super = logfs_super(sb);
 	/* kill the meta-inodes */
-	iput(super->s_master_inode);
 	iput(super->s_segfile_inode);
+	iput(super->s_master_inode);
 	iput(super->s_mapping_inode);
 }
 
diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c
index 1e1c369..2a09b8d 100644
--- a/fs/logfs/journal.c
+++ b/fs/logfs/journal.c
@@ -565,7 +565,7 @@
 	index = ofs >> PAGE_SHIFT;
 	page_ofs = ofs & (PAGE_SIZE - 1);
 
-	page = find_lock_page(mapping, index);
+	page = find_or_create_page(mapping, index, GFP_NOFS);
 	BUG_ON(!page);
 	memcpy(wbuf, page_address(page) + page_ofs, super->s_writesize);
 	unlock_page(page);
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index f1cb512..5be0abe 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -2189,7 +2189,6 @@
 		return;
 	}
 
-	BUG_ON(inode->i_ino < LOGFS_RESERVED_INOS);
 	page = inode_to_page(inode);
 	BUG_ON(!page); /* FIXME: Use emergency page */
 	logfs_put_write_page(page);
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c
index e28d090..038da09 100644
--- a/fs/logfs/segment.c
+++ b/fs/logfs/segment.c
@@ -886,7 +886,7 @@
 
 static void map_invalidatepage(struct page *page, unsigned long l)
 {
-	BUG();
+	return;
 }
 
 static int map_releasepage(struct page *page, gfp_t g)
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index ced3625..bfacf0d 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -118,7 +118,8 @@
 	.hdisplay = (hd), .hsync_start = (hss), .hsync_end = (hse), \
 	.htotal = (ht), .hskew = (hsk), .vdisplay = (vd), \
 	.vsync_start = (vss), .vsync_end = (vse), .vtotal = (vt), \
-	.vscan = (vs), .flags = (f), .vrefresh = 0
+	.vscan = (vs), .flags = (f), .vrefresh = 0, \
+	.base.type = DRM_MODE_OBJECT_MODE
 
 #define CRTC_INTERLACE_HALVE_V 0x1 /* halve V values for interlacing */
 
diff --git a/include/drm/drm_mode.h b/include/drm/drm_mode.h
index 5581980..3d6301b 100644
--- a/include/drm/drm_mode.h
+++ b/include/drm/drm_mode.h
@@ -359,8 +359,9 @@
 	struct drm_mode_modeinfo mode;
 };
 
-#define DRM_MODE_CURSOR_BO	(1<<0)
-#define DRM_MODE_CURSOR_MOVE	(1<<1)
+#define DRM_MODE_CURSOR_BO	0x01
+#define DRM_MODE_CURSOR_MOVE	0x02
+#define DRM_MODE_CURSOR_FLAGS	0x03
 
 /*
  * depending on the value in flags different members are used.
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 6043821..594b419 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -82,10 +82,18 @@
 	__x - (__x % (y));				\
 }							\
 )
+
+/*
+ * Divide positive or negative dividend by positive divisor and round
+ * to closest integer. Result is undefined for negative divisors.
+ */
 #define DIV_ROUND_CLOSEST(x, divisor)(			\
 {							\
-	typeof(divisor) __divisor = divisor;		\
-	(((x) + ((__divisor) / 2)) / (__divisor));	\
+	typeof(x) __x = x;				\
+	typeof(divisor) __d = divisor;			\
+	(((typeof(x))-1) >= 0 || (__x) >= 0) ?		\
+		(((__x) + ((__d) / 2)) / (__d)) :	\
+		(((__x) - ((__d) / 2)) / (__d));	\
 }							\
 )
 
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 111aca5..4b27f9f 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -239,6 +239,7 @@
 #define MMC_QUIRK_BLK_NO_CMD23	(1<<7)		/* Avoid CMD23 for regular multiblock */
 #define MMC_QUIRK_BROKEN_BYTE_MODE_512 (1<<8)	/* Avoid sending 512 bytes in */
 #define MMC_QUIRK_LONG_READ_TIME (1<<9)		/* Data read time > CSD says */
+#define MMC_QUIRK_SEC_ERASE_TRIM_BROKEN (1<<10)	/* Skip secure for erase/trim */
 						/* byte mode */
 	unsigned int    poweroff_notify_state;	/* eMMC4.5 notify feature */
 #define MMC_NO_POWER_NOTIFICATION	0
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index fc35260..6b4565c 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2149,7 +2149,7 @@
 #define PCI_DEVICE_ID_TIGON3_5704S	0x16a8
 #define PCI_DEVICE_ID_NX2_57800_VF	0x16a9
 #define PCI_DEVICE_ID_NX2_5706S		0x16aa
-#define PCI_DEVICE_ID_NX2_57840_MF	0x16ab
+#define PCI_DEVICE_ID_NX2_57840_MF	0x16a4
 #define PCI_DEVICE_ID_NX2_5708S		0x16ac
 #define PCI_DEVICE_ID_NX2_57840_VF	0x16ad
 #define PCI_DEVICE_ID_NX2_57810_MF	0x16ae
diff --git a/include/linux/time.h b/include/linux/time.h
index b0bbd8f..b51e664 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -125,6 +125,13 @@
 	/* Can't have more nanoseconds then a second */
 	if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC)
 		return false;
+	return true;
+}
+
+static inline bool timespec_valid_strict(const struct timespec *ts)
+{
+	if (!timespec_valid(ts))
+		return false;
 	/* Disallow values that could overflow ktime_t */
 	if ((unsigned long long)ts->tv_sec >= KTIME_SEC_MAX)
 		return false;
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 802de56..2f322c3 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -136,6 +136,22 @@
 		postrcu;						\
 	} while (0)
 
+#ifndef MODULE
+#define __DECLARE_TRACE_RCU(name, proto, args, cond, data_proto, data_args)	\
+	static inline void trace_##name##_rcuidle(proto)		\
+	{								\
+		if (static_key_false(&__tracepoint_##name.key))		\
+			__DO_TRACE(&__tracepoint_##name,		\
+				TP_PROTO(data_proto),			\
+				TP_ARGS(data_args),			\
+				TP_CONDITION(cond),			\
+				rcu_idle_exit(),			\
+				rcu_idle_enter());			\
+	}
+#else
+#define __DECLARE_TRACE_RCU(name, proto, args, cond, data_proto, data_args)
+#endif
+
 /*
  * Make sure the alignment of the structure in the __tracepoints section will
  * not add unwanted padding between the beginning of the section and the
@@ -151,16 +167,8 @@
 				TP_ARGS(data_args),			\
 				TP_CONDITION(cond),,);			\
 	}								\
-	static inline void trace_##name##_rcuidle(proto)		\
-	{								\
-		if (static_key_false(&__tracepoint_##name.key))		\
-			__DO_TRACE(&__tracepoint_##name,		\
-				TP_PROTO(data_proto),			\
-				TP_ARGS(data_args),			\
-				TP_CONDITION(cond),			\
-				rcu_idle_exit(),			\
-				rcu_idle_enter());			\
-	}								\
+	__DECLARE_TRACE_RCU(name, PARAMS(proto), PARAMS(args),		\
+		PARAMS(cond), PARAMS(data_proto), PARAMS(data_args))	\
 	static inline int						\
 	register_trace_##name(void (*probe)(data_proto), void *data)	\
 	{								\
diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
index e1ce104..4a045cd 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -18,6 +18,7 @@
 	u16 ctmask;		/* bitmask of ct events to be delivered */
 	u16 expmask;		/* bitmask of expect events to be delivered */
 	u32 pid;		/* netlink pid of destroyer */
+	struct timer_list timeout;
 };
 
 static inline struct nf_conntrack_ecache *
diff --git a/kernel/Makefile b/kernel/Makefile
index c0cc67a..29d993b 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -98,7 +98,7 @@
 obj-$(CONFIG_BINFMT_ELF_FDPIC) += elfcore.o
 obj-$(CONFIG_FUNCTION_TRACER) += trace/
 obj-$(CONFIG_TRACING) += trace/
-obj-$(CONFIG_X86_DS) += trace/
+obj-$(CONFIG_TRACE_CLOCK) += trace/
 obj-$(CONFIG_RING_BUFFER) += trace/
 obj-$(CONFIG_TRACEPOINTS) += trace/
 obj-$(CONFIG_IRQ_WORK) += irq_work.o
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 35b4315..098f396 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1418,9 +1418,6 @@
 		/* Given address is not on the instruction boundary */
 		if ((unsigned long)p->addr != ftrace_addr)
 			return -EILSEQ;
-		/* break_handler (jprobe) can not work with ftrace */
-		if (p->break_handler)
-			return -EINVAL;
 		p->flags |= KPROBE_FLAG_FTRACE;
 #else	/* !KPROBES_CAN_USE_FTRACE */
 		return -EINVAL;
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 0c1485e..34e5eac 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -428,7 +428,7 @@
 	struct timespec ts_delta, xt;
 	unsigned long flags;
 
-	if (!timespec_valid(tv))
+	if (!timespec_valid_strict(tv))
 		return -EINVAL;
 
 	write_seqlock_irqsave(&tk->lock, flags);
@@ -476,7 +476,7 @@
 
 	/* Make sure the proposed value is valid */
 	tmp = timespec_add(tk_xtime(tk),  *ts);
-	if (!timespec_valid(&tmp)) {
+	if (!timespec_valid_strict(&tmp)) {
 		ret = -EINVAL;
 		goto error;
 	}
@@ -659,7 +659,7 @@
 	struct timespec now, boot, tmp;
 
 	read_persistent_clock(&now);
-	if (!timespec_valid(&now)) {
+	if (!timespec_valid_strict(&now)) {
 		pr_warn("WARNING: Persistent clock returned invalid value!\n"
 			"         Check your CMOS/BIOS settings.\n");
 		now.tv_sec = 0;
@@ -667,7 +667,7 @@
 	}
 
 	read_boot_clock(&boot);
-	if (!timespec_valid(&boot)) {
+	if (!timespec_valid_strict(&boot)) {
 		pr_warn("WARNING: Boot clock returned invalid value!\n"
 			"         Check your CMOS/BIOS settings.\n");
 		boot.tv_sec = 0;
@@ -713,7 +713,7 @@
 static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
 							struct timespec *delta)
 {
-	if (!timespec_valid(delta)) {
+	if (!timespec_valid_strict(delta)) {
 		printk(KERN_WARNING "__timekeeping_inject_sleeptime: Invalid "
 					"sleep delta value!\n");
 		return;
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 9301a0e..4cea4f4 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -62,8 +62,12 @@
 config TRACER_MAX_TRACE
 	bool
 
+config TRACE_CLOCK
+	bool
+
 config RING_BUFFER
 	bool
+	select TRACE_CLOCK
 
 config FTRACE_NMI_ENTER
        bool
@@ -114,6 +118,7 @@
 	select NOP_TRACER
 	select BINARY_PRINTF
 	select EVENT_TRACING
+	select TRACE_CLOCK
 
 config GENERIC_TRACER
 	bool
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 8370908..d7e2068 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -19,11 +19,7 @@
 
 CFLAGS_trace_events_filter.o := -I$(src)
 
-#
-# Make the trace clocks available generally: it's infrastructure
-# relied on by ptrace for example:
-#
-obj-y += trace_clock.o
+obj-$(CONFIG_TRACE_CLOCK) += trace_clock.o
 
 obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o
 obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 6825d83..bbb0e63 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1646,9 +1646,11 @@
 		event_test_stuff();
 
 		ret = __ftrace_set_clr_event(NULL, system->name, NULL, 0);
-		if (WARN_ON_ONCE(ret))
+		if (WARN_ON_ONCE(ret)) {
 			pr_warning("error disabling system %s\n",
 				   system->name);
+			continue;
+		}
 
 		pr_cont("OK\n");
 	}
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index bd92431..4ada3be 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2562,7 +2562,7 @@
 		break;
 
 	default:
-		BUG();
+		return -EINVAL;
 	}
 
 	l = strlen(policy_modes[mode]);
diff --git a/mm/slab.c b/mm/slab.c
index f8b0d53..811af03 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3260,6 +3260,7 @@
 
 		/* cache_grow can reenable interrupts, then ac could change. */
 		ac = cpu_cache_get(cachep);
+		node = numa_mem_id();
 
 		/* no objects in sight? abort */
 		if (!x && (ac->avail == 0 || force_refill))
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 346b1eb..e4ba3e7 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -168,24 +168,16 @@
 	struct napi_struct *napi;
 	int budget = 16;
 
-	WARN_ON_ONCE(!irqs_disabled());
-
 	list_for_each_entry(napi, &dev->napi_list, dev_list) {
-		local_irq_enable();
 		if (napi->poll_owner != smp_processor_id() &&
 		    spin_trylock(&napi->poll_lock)) {
-			rcu_read_lock_bh();
 			budget = poll_one_napi(rcu_dereference_bh(dev->npinfo),
 					       napi, budget);
-			rcu_read_unlock_bh();
 			spin_unlock(&napi->poll_lock);
 
-			if (!budget) {
-				local_irq_disable();
+			if (!budget)
 				break;
-			}
 		}
-		local_irq_disable();
 	}
 }
 
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 8eec8f4..ebdf06f 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -124,6 +124,8 @@
 static struct kmem_cache *mrt_cachep __read_mostly;
 
 static struct mr_table *ipmr_new_table(struct net *net, u32 id);
+static void ipmr_free_table(struct mr_table *mrt);
+
 static int ip_mr_forward(struct net *net, struct mr_table *mrt,
 			 struct sk_buff *skb, struct mfc_cache *cache,
 			 int local);
@@ -131,6 +133,7 @@
 			     struct sk_buff *pkt, vifi_t vifi, int assert);
 static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
 			      struct mfc_cache *c, struct rtmsg *rtm);
+static void mroute_clean_tables(struct mr_table *mrt);
 static void ipmr_expire_process(unsigned long arg);
 
 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
@@ -271,7 +274,7 @@
 
 	list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) {
 		list_del(&mrt->list);
-		kfree(mrt);
+		ipmr_free_table(mrt);
 	}
 	fib_rules_unregister(net->ipv4.mr_rules_ops);
 }
@@ -299,7 +302,7 @@
 
 static void __net_exit ipmr_rules_exit(struct net *net)
 {
-	kfree(net->ipv4.mrt);
+	ipmr_free_table(net->ipv4.mrt);
 }
 #endif
 
@@ -336,6 +339,13 @@
 	return mrt;
 }
 
+static void ipmr_free_table(struct mr_table *mrt)
+{
+	del_timer_sync(&mrt->ipmr_expire_timer);
+	mroute_clean_tables(mrt);
+	kfree(mrt);
+}
+
 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
 
 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index 4ad9cf1..9c87cde 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -502,7 +502,10 @@
 		ret = nf_ct_expect_related(rtcp_exp);
 		if (ret == 0)
 			break;
-		else if (ret != -EBUSY) {
+		else if (ret == -EBUSY) {
+			nf_ct_unexpect_related(rtp_exp);
+			continue;
+		} else if (ret < 0) {
 			nf_ct_unexpect_related(rtp_exp);
 			port = 0;
 			break;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index fd9ecb5..82cf2a7 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -934,12 +934,14 @@
 	if (mtu < ip_rt_min_pmtu)
 		mtu = ip_rt_min_pmtu;
 
+	rcu_read_lock();
 	if (fib_lookup(dev_net(rt->dst.dev), fl4, &res) == 0) {
 		struct fib_nh *nh = &FIB_RES_NH(res);
 
 		update_or_create_fnhe(nh, fl4->daddr, 0, mtu,
 				      jiffies + ip_rt_mtu_expires);
 	}
+	rcu_read_unlock();
 	return mtu;
 }
 
@@ -956,7 +958,7 @@
 		dst->obsolete = DST_OBSOLETE_KILL;
 	} else {
 		rt->rt_pmtu = mtu;
-		dst_set_expires(&rt->dst, ip_rt_mtu_expires);
+		rt->dst.expires = max(1UL, jiffies + ip_rt_mtu_expires);
 	}
 }
 
@@ -1263,7 +1265,7 @@
 {
 	struct rtable *rt = (struct rtable *) dst;
 
-	if (dst->flags & DST_NOCACHE) {
+	if (!list_empty(&rt->rt_uncached)) {
 		spin_lock_bh(&rt_uncached_lock);
 		list_del(&rt->rt_uncached);
 		spin_unlock_bh(&rt_uncached_lock);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 85308b9..6e38c6c 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2926,13 +2926,14 @@
  * tcp_xmit_retransmit_queue().
  */
 static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
-				  int newly_acked_sacked, bool is_dupack,
+				  int prior_sacked, bool is_dupack,
 				  int flag)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
 				    (tcp_fackets_out(tp) > tp->reordering));
+	int newly_acked_sacked = 0;
 	int fast_rexmit = 0;
 
 	if (WARN_ON(!tp->packets_out && tp->sacked_out))
@@ -2992,6 +2993,7 @@
 				tcp_add_reno_sack(sk);
 		} else
 			do_lost = tcp_try_undo_partial(sk, pkts_acked);
+		newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked;
 		break;
 	case TCP_CA_Loss:
 		if (flag & FLAG_DATA_ACKED)
@@ -3013,6 +3015,7 @@
 			if (is_dupack)
 				tcp_add_reno_sack(sk);
 		}
+		newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked;
 
 		if (icsk->icsk_ca_state <= TCP_CA_Disorder)
 			tcp_try_undo_dsack(sk);
@@ -3590,7 +3593,6 @@
 	int prior_packets;
 	int prior_sacked = tp->sacked_out;
 	int pkts_acked = 0;
-	int newly_acked_sacked = 0;
 	bool frto_cwnd = false;
 
 	/* If the ack is older than previous acks
@@ -3666,8 +3668,6 @@
 	flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una);
 
 	pkts_acked = prior_packets - tp->packets_out;
-	newly_acked_sacked = (prior_packets - prior_sacked) -
-			     (tp->packets_out - tp->sacked_out);
 
 	if (tp->frto_counter)
 		frto_cwnd = tcp_process_frto(sk, flag);
@@ -3681,7 +3681,7 @@
 		    tcp_may_raise_cwnd(sk, flag))
 			tcp_cong_avoid(sk, ack, prior_in_flight);
 		is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
-		tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked,
+		tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
 				      is_dupack, flag);
 	} else {
 		if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
@@ -3698,7 +3698,7 @@
 no_queue:
 	/* If data was DSACKed, see if we can undo a cwnd reduction. */
 	if (flag & FLAG_DSACKING_ACK)
-		tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked,
+		tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
 				      is_dupack, flag);
 	/* If this ack opens up a zero window, clear backoff.  It was
 	 * being used to time the probes, and is probably far higher than
@@ -3718,8 +3718,7 @@
 	 */
 	if (TCP_SKB_CB(skb)->sacked) {
 		flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
-		newly_acked_sacked = tp->sacked_out - prior_sacked;
-		tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked,
+		tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
 				      is_dupack, flag);
 	}
 
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 6dc7fd3..282f372 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -167,8 +167,6 @@
 	struct esp_data *esp = x->data;
 
 	/* skb is pure payload to encrypt */
-	err = -ENOMEM;
-
 	aead = esp->aead;
 	alen = crypto_aead_authsize(aead);
 
@@ -203,8 +201,10 @@
 	}
 
 	tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen);
-	if (!tmp)
+	if (!tmp) {
+		err = -ENOMEM;
 		goto error;
+	}
 
 	seqhi = esp_tmp_seqhi(tmp);
 	iv = esp_tmp_iv(aead, tmp, seqhilen);
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 393355d..513cab0 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1347,11 +1347,10 @@
 	/* Remove from tunnel list */
 	spin_lock_bh(&pn->l2tp_tunnel_list_lock);
 	list_del_rcu(&tunnel->list);
+	kfree_rcu(tunnel, rcu);
 	spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
-	synchronize_rcu();
 
 	atomic_dec(&l2tp_tunnel_count);
-	kfree(tunnel);
 }
 
 /* Create a socket for the tunnel, if one isn't set up by
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index a38ec6c..56d583e 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -163,6 +163,7 @@
 
 struct l2tp_tunnel {
 	int			magic;		/* Should be L2TP_TUNNEL_MAGIC */
+	struct rcu_head rcu;
 	rwlock_t		hlist_lock;	/* protect session_hlist */
 	struct hlist_head	session_hlist[L2TP_HASH_SIZE];
 						/* hashed list of sessions,
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index acf712f..c5e8c9c 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1811,37 +1811,31 @@
 			meshhdrlen = ieee80211_new_mesh_header(&mesh_hdr,
 					sdata, NULL, NULL);
 		} else {
-			int is_mesh_mcast = 1;
-			const u8 *mesh_da;
+			/* DS -> MBSS (802.11-2012 13.11.3.3).
+			 * For unicast with unknown forwarding information,
+			 * destination might be in the MBSS or if that fails
+			 * forwarded to another mesh gate. In either case
+			 * resolution will be handled in ieee80211_xmit(), so
+			 * leave the original DA. This also works for mcast */
+			const u8 *mesh_da = skb->data;
 
-			if (is_multicast_ether_addr(skb->data))
-				/* DA TA mSA AE:SA */
-				mesh_da = skb->data;
-			else {
-				static const u8 bcast[ETH_ALEN] =
-					{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
-				if (mppath) {
-					/* RA TA mDA mSA AE:DA SA */
-					mesh_da = mppath->mpp;
-					is_mesh_mcast = 0;
-				} else if (mpath) {
-					mesh_da = mpath->dst;
-					is_mesh_mcast = 0;
-				} else {
-					/* DA TA mSA AE:SA */
-					mesh_da = bcast;
-				}
-			}
+			if (mppath)
+				mesh_da = mppath->mpp;
+			else if (mpath)
+				mesh_da = mpath->dst;
+			rcu_read_unlock();
+
 			hdrlen = ieee80211_fill_mesh_addresses(&hdr, &fc,
 					mesh_da, sdata->vif.addr);
-			rcu_read_unlock();
-			if (is_mesh_mcast)
+			if (is_multicast_ether_addr(mesh_da))
+				/* DA TA mSA AE:SA */
 				meshhdrlen =
 					ieee80211_new_mesh_header(&mesh_hdr,
 							sdata,
 							skb->data + ETH_ALEN,
 							NULL);
 			else
+				/* RA TA mDA mSA AE:DA SA */
 				meshhdrlen =
 					ieee80211_new_mesh_header(&mesh_hdr,
 							sdata,
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 72bf32a..f51013c 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1171,8 +1171,10 @@
 		goto out_err;
 	}
 	svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
-	if (!svc->stats.cpustats)
+	if (!svc->stats.cpustats) {
+		ret = -ENOMEM;
 		goto out_err;
+	}
 
 	/* I'm the first user of the service */
 	atomic_set(&svc->usecnt, 0);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index cf48755..2ceec64 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -249,12 +249,15 @@
 {
 	struct nf_conn *ct = (void *)ul_conntrack;
 	struct net *net = nf_ct_net(ct);
+	struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct);
+
+	BUG_ON(ecache == NULL);
 
 	if (nf_conntrack_event(IPCT_DESTROY, ct) < 0) {
 		/* bad luck, let's retry again */
-		ct->timeout.expires = jiffies +
+		ecache->timeout.expires = jiffies +
 			(random32() % net->ct.sysctl_events_retry_timeout);
-		add_timer(&ct->timeout);
+		add_timer(&ecache->timeout);
 		return;
 	}
 	/* we've got the event delivered, now it's dying */
@@ -268,6 +271,9 @@
 void nf_ct_insert_dying_list(struct nf_conn *ct)
 {
 	struct net *net = nf_ct_net(ct);
+	struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct);
+
+	BUG_ON(ecache == NULL);
 
 	/* add this conntrack to the dying list */
 	spin_lock_bh(&nf_conntrack_lock);
@@ -275,10 +281,10 @@
 			     &net->ct.dying);
 	spin_unlock_bh(&nf_conntrack_lock);
 	/* set a new timer to retry event delivery */
-	setup_timer(&ct->timeout, death_by_event, (unsigned long)ct);
-	ct->timeout.expires = jiffies +
+	setup_timer(&ecache->timeout, death_by_event, (unsigned long)ct);
+	ecache->timeout.expires = jiffies +
 		(random32() % net->ct.sysctl_events_retry_timeout);
-	add_timer(&ct->timeout);
+	add_timer(&ecache->timeout);
 }
 EXPORT_SYMBOL_GPL(nf_ct_insert_dying_list);
 
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index da4fc37..9807f32 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2790,7 +2790,8 @@
 		goto err_unreg_subsys;
 	}
 
-	if (register_pernet_subsys(&ctnetlink_net_ops)) {
+	ret = register_pernet_subsys(&ctnetlink_net_ops);
+	if (ret < 0) {
 		pr_err("ctnetlink_init: cannot register pernet operations\n");
 		goto err_unreg_exp_subsys;
 	}
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 169ab59..14e2f39 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -480,7 +480,7 @@
 	}
 
 	if (indev && skb_mac_header_was_set(skb)) {
-		if (nla_put_be32(inst->skb, NFULA_HWTYPE, htons(skb->dev->type)) ||
+		if (nla_put_be16(inst->skb, NFULA_HWTYPE, htons(skb->dev->type)) ||
 		    nla_put_be16(inst->skb, NFULA_HWLEN,
 				 htons(skb->dev->hard_header_len)) ||
 		    nla_put(inst->skb, NFULA_HWHEADER, skb->dev->hard_header_len,
@@ -996,8 +996,10 @@
 
 #ifdef CONFIG_PROC_FS
 	if (!proc_create("nfnetlink_log", 0440,
-			 proc_net_netfilter, &nful_file_ops))
+			 proc_net_netfilter, &nful_file_ops)) {
+		status = -ENOMEM;
 		goto cleanup_logger;
+	}
 #endif
 	return status;
 
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 1445d73..5270238 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1373,7 +1373,8 @@
 		dst_pid = addr->nl_pid;
 		dst_group = ffs(addr->nl_groups);
 		err =  -EPERM;
-		if (dst_group && !netlink_capable(sock, NL_NONROOT_SEND))
+		if ((dst_group || dst_pid) &&
+		    !netlink_capable(sock, NL_NONROOT_SEND))
 			goto out;
 	} else {
 		dst_pid = nlk->dst_pid;
@@ -2147,6 +2148,7 @@
 	rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners);
 	nl_table[NETLINK_USERSOCK].module = THIS_MODULE;
 	nl_table[NETLINK_USERSOCK].registered = 1;
+	nl_table[NETLINK_USERSOCK].nl_nonroot = NL_NONROOT_SEND;
 
 	netlink_table_ungrab();
 }
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index aee7196..c5c9e2a 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1273,7 +1273,7 @@
 	spin_unlock(&f->lock);
 }
 
-bool match_fanout_group(struct packet_type *ptype, struct sock * sk)
+static bool match_fanout_group(struct packet_type *ptype, struct sock * sk)
 {
 	if (ptype->af_packet_priv == (void*)((struct packet_sock *)sk)->fanout)
 		return true;
diff --git a/net/socket.c b/net/socket.c
index a5471f8..edc3c4a 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2604,7 +2604,7 @@
 	err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
 	set_fs(old_fs);
 	if (!err)
-		err = compat_put_timeval(up, &ktv);
+		err = compat_put_timeval(&ktv, up);
 
 	return err;
 }
@@ -2620,7 +2620,7 @@
 	err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
 	set_fs(old_fs);
 	if (!err)
-		err = compat_put_timespec(up, &kts);
+		err = compat_put_timespec(&kts, up);
 
 	return err;
 }
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 87cd0e4..210be48 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1994,8 +1994,10 @@
 		goto error;
 
 	x->outer_mode = xfrm_get_mode(x->props.mode, family);
-	if (x->outer_mode == NULL)
+	if (x->outer_mode == NULL) {
+		err = -EPROTONOSUPPORT;
 		goto error;
+	}
 
 	if (init_replay) {
 		err = xfrm_init_replay(x);
diff --git a/scripts/Makefile.fwinst b/scripts/Makefile.fwinst
index 6bf8e87..c3f69ae2 100644
--- a/scripts/Makefile.fwinst
+++ b/scripts/Makefile.fwinst
@@ -42,7 +42,7 @@
 $(installed-fw-dirs):
 	$(call cmd,mkdir)
 
-$(installed-fw): $(INSTALL_FW_PATH)/%: $(obj)/% | $(INSTALL_FW_PATH)/$$(dir %)
+$(installed-fw): $(INSTALL_FW_PATH)/%: $(obj)/% | $$(dir $(INSTALL_FW_PATH)/%)
 	$(call cmd,install)
 
 PHONY +=  __fw_install __fw_modinst FORCE
diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index 4629038c..4235a63 100644
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -211,7 +211,7 @@
 
 	if ! cmp -s System.map .tmp_System.map; then
 		echo >&2 Inconsistent kallsyms data
-		echo >&2 echo Try "make KALLSYMS_EXTRA_PASS=1" as a workaround
+		echo >&2 Try "make KALLSYMS_EXTRA_PASS=1" as a workaround
 		cleanup
 		exit 1
 	fi
diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index f560051..f25c24c 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -1209,6 +1209,9 @@
 	kfree(codec);
 }
 
+static bool snd_hda_codec_get_supported_ps(struct hda_codec *codec,
+				hda_nid_t fg, unsigned int power_state);
+
 static void hda_set_power_state(struct hda_codec *codec, hda_nid_t fg,
 				unsigned int power_state);
 
@@ -1317,6 +1320,10 @@
 					   AC_VERB_GET_SUBSYSTEM_ID, 0);
 	}
 
+	codec->epss = snd_hda_codec_get_supported_ps(codec,
+					codec->afg ? codec->afg : codec->mfg,
+					AC_PWRST_EPSS);
+
 	/* power-up all before initialization */
 	hda_set_power_state(codec,
 			    codec->afg ? codec->afg : codec->mfg,
@@ -3543,8 +3550,7 @@
 	/* this delay seems necessary to avoid click noise at power-down */
 	if (power_state == AC_PWRST_D3) {
 		/* transition time less than 10ms for power down */
-		bool epss = snd_hda_codec_get_supported_ps(codec, fg, AC_PWRST_EPSS);
-		msleep(epss ? 10 : 100);
+		msleep(codec->epss ? 10 : 100);
 	}
 
 	/* repeat power states setting at most 10 times*/
diff --git a/sound/pci/hda/hda_codec.h b/sound/pci/hda/hda_codec.h
index 7fbc1bc..e5a7e19 100644
--- a/sound/pci/hda/hda_codec.h
+++ b/sound/pci/hda/hda_codec.h
@@ -862,6 +862,7 @@
 	unsigned int ignore_misc_bit:1; /* ignore MISC_NO_PRESENCE bit */
 	unsigned int no_jack_detect:1;	/* Machine has no jack-detection */
 	unsigned int pcm_format_first:1; /* PCM format must be set first */
+	unsigned int epss:1;		/* supporting EPSS? */
 #ifdef CONFIG_SND_HDA_POWER_SAVE
 	unsigned int power_on :1;	/* current (global) power-state */
 	int power_transition;	/* power-state in transition */
diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index ea5775a..6f806d3 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -4543,6 +4543,9 @@
 	struct auto_pin_cfg *cfg = &spec->autocfg;
 	int i;
 
+	if (cfg->speaker_outs == 0)
+		return;
+
 	for (i = 0; i < cfg->line_outs; i++) {
 		if (presence)
 			break;
@@ -5531,6 +5534,7 @@
 		snd_hda_codec_set_pincfg(codec, 0xf, 0x2181205e);
 	}
 
+	codec->epss = 0; /* longer delay needed for D3 */
 	codec->no_trigger_sense = 1;
 	codec->spec = spec;
 
diff --git a/sound/usb/card.c b/sound/usb/card.c
index d5b5c33..4a469f0 100644
--- a/sound/usb/card.c
+++ b/sound/usb/card.c
@@ -553,7 +553,7 @@
 				     struct snd_usb_audio *chip)
 {
 	struct snd_card *card;
-	struct list_head *p;
+	struct list_head *p, *n;
 
 	if (chip == (void *)-1L)
 		return;
@@ -570,7 +570,7 @@
 			snd_usb_stream_disconnect(p);
 		}
 		/* release the endpoint resources */
-		list_for_each(p, &chip->ep_list) {
+		list_for_each_safe(p, n, &chip->ep_list) {
 			snd_usb_endpoint_free(p);
 		}
 		/* release the midi resources */
diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c
index c411812..d6e2bb4 100644
--- a/sound/usb/endpoint.c
+++ b/sound/usb/endpoint.c
@@ -141,7 +141,7 @@
  *
  * For implicit feedback, next_packet_size() is unused.
  */
-static int next_packet_size(struct snd_usb_endpoint *ep)
+int snd_usb_endpoint_next_packet_size(struct snd_usb_endpoint *ep)
 {
 	unsigned long flags;
 	int ret;
@@ -177,15 +177,6 @@
 		ep->retire_data_urb(ep->data_subs, urb);
 }
 
-static void prepare_outbound_urb_sizes(struct snd_usb_endpoint *ep,
-				       struct snd_urb_ctx *ctx)
-{
-	int i;
-
-	for (i = 0; i < ctx->packets; ++i)
-		ctx->packet_size[i] = next_packet_size(ep);
-}
-
 /*
  * Prepare a PLAYBACK urb for submission to the bus.
  */
@@ -370,7 +361,6 @@
 			goto exit_clear;
 		}
 
-		prepare_outbound_urb_sizes(ep, ctx);
 		prepare_outbound_urb(ep, ctx);
 	} else {
 		retire_inbound_urb(ep, ctx);
@@ -799,7 +789,9 @@
 /**
  * snd_usb_endpoint_start: start an snd_usb_endpoint
  *
- * @ep: the endpoint to start
+ * @ep:		the endpoint to start
+ * @can_sleep:	flag indicating whether the operation is executed in
+ * 		non-atomic context
  *
  * A call to this function will increment the use count of the endpoint.
  * In case it is not already running, the URBs for this endpoint will be
@@ -809,7 +801,7 @@
  *
  * Returns an error if the URB submission failed, 0 in all other cases.
  */
-int snd_usb_endpoint_start(struct snd_usb_endpoint *ep)
+int snd_usb_endpoint_start(struct snd_usb_endpoint *ep, int can_sleep)
 {
 	int err;
 	unsigned int i;
@@ -821,6 +813,11 @@
 	if (++ep->use_count != 1)
 		return 0;
 
+	/* just to be sure */
+	deactivate_urbs(ep, 0, can_sleep);
+	if (can_sleep)
+		wait_clear_urbs(ep);
+
 	ep->active_mask = 0;
 	ep->unlink_mask = 0;
 	ep->phase = 0;
@@ -850,7 +847,6 @@
 			goto __error;
 
 		if (usb_pipeout(ep->pipe)) {
-			prepare_outbound_urb_sizes(ep, urb->context);
 			prepare_outbound_urb(ep, urb->context);
 		} else {
 			prepare_inbound_urb(ep, urb->context);
diff --git a/sound/usb/endpoint.h b/sound/usb/endpoint.h
index ee2723f..cbbbdf2 100644
--- a/sound/usb/endpoint.h
+++ b/sound/usb/endpoint.h
@@ -13,7 +13,7 @@
 				struct audioformat *fmt,
 				struct snd_usb_endpoint *sync_ep);
 
-int  snd_usb_endpoint_start(struct snd_usb_endpoint *ep);
+int  snd_usb_endpoint_start(struct snd_usb_endpoint *ep, int can_sleep);
 void snd_usb_endpoint_stop(struct snd_usb_endpoint *ep,
 			   int force, int can_sleep, int wait);
 int  snd_usb_endpoint_activate(struct snd_usb_endpoint *ep);
@@ -21,6 +21,7 @@
 void snd_usb_endpoint_free(struct list_head *head);
 
 int snd_usb_endpoint_implict_feedback_sink(struct snd_usb_endpoint *ep);
+int snd_usb_endpoint_next_packet_size(struct snd_usb_endpoint *ep);
 
 void snd_usb_handle_sync_urb(struct snd_usb_endpoint *ep,
 			     struct snd_usb_endpoint *sender,
diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c
index 62ec808..fd5e982 100644
--- a/sound/usb/pcm.c
+++ b/sound/usb/pcm.c
@@ -212,7 +212,7 @@
 	}
 }
 
-static int start_endpoints(struct snd_usb_substream *subs)
+static int start_endpoints(struct snd_usb_substream *subs, int can_sleep)
 {
 	int err;
 
@@ -225,7 +225,7 @@
 		snd_printdd(KERN_DEBUG "Starting data EP @%p\n", ep);
 
 		ep->data_subs = subs;
-		err = snd_usb_endpoint_start(ep);
+		err = snd_usb_endpoint_start(ep, can_sleep);
 		if (err < 0) {
 			clear_bit(SUBSTREAM_FLAG_DATA_EP_STARTED, &subs->flags);
 			return err;
@@ -236,10 +236,25 @@
 	    !test_and_set_bit(SUBSTREAM_FLAG_SYNC_EP_STARTED, &subs->flags)) {
 		struct snd_usb_endpoint *ep = subs->sync_endpoint;
 
+		if (subs->data_endpoint->iface != subs->sync_endpoint->iface ||
+		    subs->data_endpoint->alt_idx != subs->sync_endpoint->alt_idx) {
+			err = usb_set_interface(subs->dev,
+						subs->sync_endpoint->iface,
+						subs->sync_endpoint->alt_idx);
+			if (err < 0) {
+				snd_printk(KERN_ERR
+					   "%d:%d:%d: cannot set interface (%d)\n",
+					   subs->dev->devnum,
+					   subs->sync_endpoint->iface,
+					   subs->sync_endpoint->alt_idx, err);
+				return -EIO;
+			}
+		}
+
 		snd_printdd(KERN_DEBUG "Starting sync EP @%p\n", ep);
 
 		ep->sync_slave = subs->data_endpoint;
-		err = snd_usb_endpoint_start(ep);
+		err = snd_usb_endpoint_start(ep, can_sleep);
 		if (err < 0) {
 			clear_bit(SUBSTREAM_FLAG_SYNC_EP_STARTED, &subs->flags);
 			return err;
@@ -544,13 +559,10 @@
 	subs->last_frame_number = 0;
 	runtime->delay = 0;
 
-	/* clear the pending deactivation on the target EPs */
-	deactivate_endpoints(subs);
-
 	/* for playback, submit the URBs now; otherwise, the first hwptr_done
 	 * updates for all URBs would happen at the same time when starting */
 	if (subs->direction == SNDRV_PCM_STREAM_PLAYBACK)
-		return start_endpoints(subs);
+		return start_endpoints(subs, 1);
 
 	return 0;
 }
@@ -1032,6 +1044,7 @@
 				 struct urb *urb)
 {
 	struct snd_pcm_runtime *runtime = subs->pcm_substream->runtime;
+	struct snd_usb_endpoint *ep = subs->data_endpoint;
 	struct snd_urb_ctx *ctx = urb->context;
 	unsigned int counts, frames, bytes;
 	int i, stride, period_elapsed = 0;
@@ -1043,7 +1056,11 @@
 	urb->number_of_packets = 0;
 	spin_lock_irqsave(&subs->lock, flags);
 	for (i = 0; i < ctx->packets; i++) {
-		counts = ctx->packet_size[i];
+		if (ctx->packet_size[i])
+			counts = ctx->packet_size[i];
+		else
+			counts = snd_usb_endpoint_next_packet_size(ep);
+
 		/* set up descriptor */
 		urb->iso_frame_desc[i].offset = frames * stride;
 		urb->iso_frame_desc[i].length = counts * stride;
@@ -1094,7 +1111,16 @@
 	subs->hwptr_done += bytes;
 	if (subs->hwptr_done >= runtime->buffer_size * stride)
 		subs->hwptr_done -= runtime->buffer_size * stride;
+
+	/* update delay with exact number of samples queued */
+	runtime->delay = subs->last_delay;
 	runtime->delay += frames;
+	subs->last_delay = runtime->delay;
+
+	/* realign last_frame_number */
+	subs->last_frame_number = usb_get_current_frame_number(subs->dev);
+	subs->last_frame_number &= 0xFF; /* keep 8 LSBs */
+
 	spin_unlock_irqrestore(&subs->lock, flags);
 	urb->transfer_buffer_length = bytes;
 	if (period_elapsed)
@@ -1112,12 +1138,26 @@
 	struct snd_pcm_runtime *runtime = subs->pcm_substream->runtime;
 	int stride = runtime->frame_bits >> 3;
 	int processed = urb->transfer_buffer_length / stride;
+	int est_delay;
 
 	spin_lock_irqsave(&subs->lock, flags);
-	if (processed > runtime->delay)
-		runtime->delay = 0;
+	est_delay = snd_usb_pcm_delay(subs, runtime->rate);
+	/* update delay with exact number of samples played */
+	if (processed > subs->last_delay)
+		subs->last_delay = 0;
 	else
-		runtime->delay -= processed;
+		subs->last_delay -= processed;
+	runtime->delay = subs->last_delay;
+
+	/*
+	 * Report when delay estimate is off by more than 2ms.
+	 * The error should be lower than 2ms since the estimate relies
+	 * on two reads of a counter updated every ms.
+	 */
+	if (abs(est_delay - subs->last_delay) * 1000 > runtime->rate * 2)
+		snd_printk(KERN_DEBUG "delay: estimated %d, actual %d\n",
+			est_delay, subs->last_delay);
+
 	spin_unlock_irqrestore(&subs->lock, flags);
 }
 
@@ -1175,7 +1215,7 @@
 
 	switch (cmd) {
 	case SNDRV_PCM_TRIGGER_START:
-		err = start_endpoints(subs);
+		err = start_endpoints(subs, 0);
 		if (err < 0)
 			return err;
 
diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile
index 14131cb..04d959f 100644
--- a/tools/lib/traceevent/Makefile
+++ b/tools/lib/traceevent/Makefile
@@ -129,7 +129,7 @@
 
 # Append required CFLAGS
 override CFLAGS += $(CONFIG_FLAGS) $(INCLUDES) $(PLUGIN_DIR_SQ)
-override CFLAGS += $(udis86-flags)
+override CFLAGS += $(udis86-flags) -D_GNU_SOURCE
 
 ifeq ($(VERBOSE),1)
   Q =
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index b5b4d80..77ebeb8 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -24,7 +24,6 @@
  *  Frederic Weisbecker gave his permission to relicense the code to
  *  the Lesser General Public License.
  */
-#define _GNU_SOURCE
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -1824,7 +1823,7 @@
 }
 
 static enum event_type
-process_entry(struct event_format *event __unused, struct print_arg *arg,
+process_entry(struct event_format *event __maybe_unused, struct print_arg *arg,
 	      char **tok)
 {
 	enum event_type type;
@@ -2458,7 +2457,8 @@
 
 
 static enum event_type
-process_str(struct event_format *event __unused, struct print_arg *arg, char **tok)
+process_str(struct event_format *event __maybe_unused, struct print_arg *arg,
+	    char **tok)
 {
 	enum event_type type;
 	char *token;
@@ -3653,7 +3653,8 @@
 }
 
 static char *
-get_bprint_format(void *data, int size __unused, struct event_format *event)
+get_bprint_format(void *data, int size __maybe_unused,
+		  struct event_format *event)
 {
 	struct pevent *pevent = event->pevent;
 	unsigned long long addr;
@@ -3889,8 +3890,11 @@
 				goto cont_process;
 			case '*':
 				/* The argument is the length. */
-				if (!arg)
-					die("no argument match");
+				if (!arg) {
+					do_warning("no argument match");
+					event->flags |= EVENT_FL_FAILED;
+					goto out_failed;
+				}
 				len_arg = eval_num_arg(data, size, event, arg);
 				len_as_arg = 1;
 				arg = arg->next;
@@ -3923,15 +3927,21 @@
 			case 'x':
 			case 'X':
 			case 'u':
-				if (!arg)
-					die("no argument match");
+				if (!arg) {
+					do_warning("no argument match");
+					event->flags |= EVENT_FL_FAILED;
+					goto out_failed;
+				}
 
 				len = ((unsigned long)ptr + 1) -
 					(unsigned long)saveptr;
 
 				/* should never happen */
-				if (len > 31)
-					die("bad format!");
+				if (len > 31) {
+					do_warning("bad format!");
+					event->flags |= EVENT_FL_FAILED;
+					len = 31;
+				}
 
 				memcpy(format, saveptr, len);
 				format[len] = 0;
@@ -3995,19 +4005,26 @@
 						trace_seq_printf(s, format, (long long)val);
 					break;
 				default:
-					die("bad count (%d)", ls);
+					do_warning("bad count (%d)", ls);
+					event->flags |= EVENT_FL_FAILED;
 				}
 				break;
 			case 's':
-				if (!arg)
-					die("no matching argument");
+				if (!arg) {
+					do_warning("no matching argument");
+					event->flags |= EVENT_FL_FAILED;
+					goto out_failed;
+				}
 
 				len = ((unsigned long)ptr + 1) -
 					(unsigned long)saveptr;
 
 				/* should never happen */
-				if (len > 31)
-					die("bad format!");
+				if (len > 31) {
+					do_warning("bad format!");
+					event->flags |= EVENT_FL_FAILED;
+					len = 31;
+				}
 
 				memcpy(format, saveptr, len);
 				format[len] = 0;
@@ -4025,6 +4042,11 @@
 			trace_seq_putc(s, *ptr);
 	}
 
+	if (event->flags & EVENT_FL_FAILED) {
+out_failed:
+		trace_seq_printf(s, "[FAILED TO PARSE]");
+	}
+
 	if (args) {
 		free_args(args);
 		free(bprint_fmt);
@@ -4812,8 +4834,8 @@
 		msg = strerror_r(errnum, buf, buflen);
 		if (msg != buf) {
 			size_t len = strlen(msg);
-			char *c = mempcpy(buf, msg, min(buflen-1, len));
-			*c = '\0';
+			memcpy(buf, msg, min(buflen - 1, len));
+			*(buf + min(buflen - 1, len)) = '\0';
 		}
 		return 0;
 	}
@@ -5059,6 +5081,7 @@
 	struct pevent_func_params *param;
 	enum pevent_func_arg_type type;
 	va_list ap;
+	int ret;
 
 	func_handle = find_func_handler(pevent, name);
 	if (func_handle) {
@@ -5071,14 +5094,21 @@
 		remove_func_handler(pevent, name);
 	}
 
-	func_handle = malloc_or_die(sizeof(*func_handle));
+	func_handle = malloc(sizeof(*func_handle));
+	if (!func_handle) {
+		do_warning("Failed to allocate function handler");
+		return PEVENT_ERRNO__MEM_ALLOC_FAILED;
+	}
 	memset(func_handle, 0, sizeof(*func_handle));
 
 	func_handle->ret_type = ret_type;
 	func_handle->name = strdup(name);
 	func_handle->func = func;
-	if (!func_handle->name)
-		die("Failed to allocate function name");
+	if (!func_handle->name) {
+		do_warning("Failed to allocate function name");
+		free(func_handle);
+		return PEVENT_ERRNO__MEM_ALLOC_FAILED;
+	}
 
 	next_param = &(func_handle->params);
 	va_start(ap, name);
@@ -5088,11 +5118,17 @@
 			break;
 
 		if (type < 0 || type >= PEVENT_FUNC_ARG_MAX_TYPES) {
-			warning("Invalid argument type %d", type);
+			do_warning("Invalid argument type %d", type);
+			ret = PEVENT_ERRNO__INVALID_ARG_TYPE;
 			goto out_free;
 		}
 
-		param = malloc_or_die(sizeof(*param));
+		param = malloc(sizeof(*param));
+		if (!param) {
+			do_warning("Failed to allocate function param");
+			ret = PEVENT_ERRNO__MEM_ALLOC_FAILED;
+			goto out_free;
+		}
 		param->type = type;
 		param->next = NULL;
 
@@ -5110,7 +5146,7 @@
  out_free:
 	va_end(ap);
 	free_func_handle(func_handle);
-	return -1;
+	return ret;
 }
 
 /**
@@ -5162,7 +5198,12 @@
 
  not_found:
 	/* Save for later use. */
-	handle = malloc_or_die(sizeof(*handle));
+	handle = malloc(sizeof(*handle));
+	if (!handle) {
+		do_warning("Failed to allocate event handler");
+		return PEVENT_ERRNO__MEM_ALLOC_FAILED;
+	}
+
 	memset(handle, 0, sizeof(*handle));
 	handle->id = id;
 	if (event_name)
@@ -5172,7 +5213,11 @@
 
 	if ((event_name && !handle->event_name) ||
 	    (sys_name && !handle->sys_name)) {
-		die("Failed to allocate event/sys name");
+		do_warning("Failed to allocate event/sys name");
+		free((void *)handle->event_name);
+		free((void *)handle->sys_name);
+		free(handle);
+		return PEVENT_ERRNO__MEM_ALLOC_FAILED;
 	}
 
 	handle->func = func;
diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h
index 527df03..a4bbe24 100644
--- a/tools/lib/traceevent/event-parse.h
+++ b/tools/lib/traceevent/event-parse.h
@@ -24,8 +24,8 @@
 #include <stdarg.h>
 #include <regex.h>
 
-#ifndef __unused
-#define __unused __attribute__ ((unused))
+#ifndef __maybe_unused
+#define __maybe_unused __attribute__((unused))
 #endif
 
 /* ----------------------- trace_seq ----------------------- */
@@ -49,7 +49,7 @@
 	int			cpu;
 	int			ref_count;
 	int			locked;		/* Do not free, even if ref_count is zero */
-	void			*private;
+	void			*priv;
 #if DEBUG_RECORD
 	struct pevent_record	*prev;
 	struct pevent_record	*next;
@@ -106,7 +106,7 @@
 	char				*plugin_alias;
 	char				*description;
 	char				*value;
-	void				*private;
+	void				*priv;
 	int				set;
 };
 
@@ -351,7 +351,8 @@
 	_PE(READ_ID_FAILED,	"failed to read event id"),		      \
 	_PE(READ_FORMAT_FAILED,	"failed to read event format"),		      \
 	_PE(READ_PRINT_FAILED,	"failed to read event print fmt"), 	      \
-	_PE(OLD_FTRACE_ARG_FAILED,"failed to allocate field name for ftrace")
+	_PE(OLD_FTRACE_ARG_FAILED,"failed to allocate field name for ftrace"),\
+	_PE(INVALID_ARG_TYPE,	"invalid argument type")
 
 #undef _PE
 #define _PE(__code, __str) PEVENT_ERRNO__ ## __code
diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore
index 26b823b..8f8fbc2 100644
--- a/tools/perf/.gitignore
+++ b/tools/perf/.gitignore
@@ -21,3 +21,5 @@
 config.mak.autogen
 *-bison.*
 *-flex.*
+*.pyc
+*.pyo
diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt
index c89f9e1..c8ffd9f 100644
--- a/tools/perf/Documentation/perf-annotate.txt
+++ b/tools/perf/Documentation/perf-annotate.txt
@@ -85,6 +85,9 @@
 -M::
 --disassembler-style=:: Set disassembler style for objdump.
 
+--objdump=<path>::
+        Path to objdump binary.
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-report[1]
diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt
index 74d7481..ab7f667 100644
--- a/tools/perf/Documentation/perf-diff.txt
+++ b/tools/perf/Documentation/perf-diff.txt
@@ -17,6 +17,9 @@
 
 If no parameters are passed it will assume perf.data.old and perf.data.
 
+The differential profile is displayed only for events matching both
+specified perf.data files.
+
 OPTIONS
 -------
 -M::
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 495210a..f4d91be 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -168,6 +168,9 @@
 	branch stacks and it will automatically switch to the branch view mode,
 	unless --no-branch-stack is used.
 
+--objdump=<path>::
+        Path to objdump binary.
+
 SEE ALSO
 --------
 linkperf:perf-stat[1], linkperf:perf-annotate[1]
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index b4b572e..0518079 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -10,6 +10,7 @@
 lib/rbtree.c
 include/linux/swab.h
 arch/*/include/asm/unistd*.h
+arch/*/include/asm/perf_regs.h
 arch/*/lib/memcpy*.S
 arch/*/lib/memset*.S
 include/linux/poison.h
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 722ddee..5077f8e 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -64,12 +64,12 @@
 
 # Additional ARCH settings for x86
 ifeq ($(ARCH),i386)
-	ARCH := x86
+	override ARCH := x86
 	NO_PERF_REGS := 0
 	LIBUNWIND_LIBS = -lunwind -lunwind-x86
 endif
 ifeq ($(ARCH),x86_64)
-	ARCH := x86
+	override ARCH := x86
 	IS_X86_64 := 0
 	ifeq (, $(findstring m32,$(EXTRA_CFLAGS)))
 		IS_X86_64 := $(shell echo __x86_64__ | ${CC} -E -xc - | tail -n 1)
@@ -264,6 +264,7 @@
 LIB_H += util/include/linux/kernel.h
 LIB_H += util/include/linux/list.h
 LIB_H += util/include/linux/export.h
+LIB_H += util/include/linux/magic.h
 LIB_H += util/include/linux/poison.h
 LIB_H += util/include/linux/prefetch.h
 LIB_H += util/include/linux/rbtree.h
@@ -336,6 +337,7 @@
 LIB_H += util/perf_regs.h
 LIB_H += util/unwind.h
 LIB_H += ui/helpline.h
+LIB_H += util/vdso.h
 
 LIB_OBJS += $(OUTPUT)util/abspath.o
 LIB_OBJS += $(OUTPUT)util/alias.o
@@ -403,7 +405,11 @@
 LIB_OBJS += $(OUTPUT)util/target.o
 LIB_OBJS += $(OUTPUT)util/rblist.o
 LIB_OBJS += $(OUTPUT)util/intlist.o
+LIB_OBJS += $(OUTPUT)util/vdso.o
+LIB_OBJS += $(OUTPUT)util/stat.o
+
 LIB_OBJS += $(OUTPUT)ui/helpline.o
+LIB_OBJS += $(OUTPUT)ui/hist.o
 LIB_OBJS += $(OUTPUT)ui/stdio/hist.o
 
 BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o
@@ -755,6 +761,14 @@
 	endif
 endif
 
+ifdef NO_BACKTRACE
+       BASIC_CFLAGS += -DNO_BACKTRACE
+else
+       ifneq ($(call try-cc,$(SOURCE_BACKTRACE),),y)
+               BASIC_CFLAGS += -DNO_BACKTRACE
+       endif
+endif
+
 ifdef ASCIIDOC8
 	export ASCIIDOC8
 endif
@@ -917,6 +931,9 @@
 $(LIBTRACEEVENT):
 	$(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) libtraceevent.a
 
+$(LIBTRACEEVENT)-clean:
+	$(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) clean
+
 help:
 	@echo 'Perf make targets:'
 	@echo '  doc		- make *all* documentation (see below)'
@@ -1056,7 +1073,7 @@
 
 ### Cleaning rules
 
-clean:
+clean: $(LIBTRACEEVENT)-clean
 	$(RM) $(LIB_OBJS) $(BUILTIN_OBJS) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf.o $(LANG_BINDINGS)
 	$(RM) $(ALL_PROGRAMS) perf
 	$(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope*
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index a09bece..8f89998 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -3,7 +3,8 @@
 
 extern int bench_sched_messaging(int argc, const char **argv, const char *prefix);
 extern int bench_sched_pipe(int argc, const char **argv, const char *prefix);
-extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __used);
+extern int bench_mem_memcpy(int argc, const char **argv,
+			    const char *prefix __maybe_unused);
 extern int bench_mem_memset(int argc, const char **argv, const char *prefix);
 
 #define BENCH_FORMAT_DEFAULT_STR	"default"
diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c
index 02dad5d..93c83e3 100644
--- a/tools/perf/bench/mem-memcpy.c
+++ b/tools/perf/bench/mem-memcpy.c
@@ -177,7 +177,7 @@
 	} while (0)
 
 int bench_mem_memcpy(int argc, const char **argv,
-		     const char *prefix __used)
+		     const char *prefix __maybe_unused)
 {
 	int i;
 	size_t len;
diff --git a/tools/perf/bench/mem-memset.c b/tools/perf/bench/mem-memset.c
index 350cc95..c6e4bc5 100644
--- a/tools/perf/bench/mem-memset.c
+++ b/tools/perf/bench/mem-memset.c
@@ -171,7 +171,7 @@
 	} while (0)
 
 int bench_mem_memset(int argc, const char **argv,
-		     const char *prefix __used)
+		     const char *prefix __maybe_unused)
 {
 	int i;
 	size_t len;
diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c
index d1d1b30..cc1190a 100644
--- a/tools/perf/bench/sched-messaging.c
+++ b/tools/perf/bench/sched-messaging.c
@@ -267,7 +267,7 @@
 };
 
 int bench_sched_messaging(int argc, const char **argv,
-		    const char *prefix __used)
+		    const char *prefix __maybe_unused)
 {
 	unsigned int i, total_children;
 	struct timeval start, stop, diff;
diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c
index 0c7454f..69cfba8 100644
--- a/tools/perf/bench/sched-pipe.c
+++ b/tools/perf/bench/sched-pipe.c
@@ -43,7 +43,7 @@
 };
 
 int bench_sched_pipe(int argc, const char **argv,
-		     const char *prefix __used)
+		     const char *prefix __maybe_unused)
 {
 	int pipe_1[2], pipe_2[2];
 	int m = 0, i;
@@ -55,14 +55,14 @@
 	 * discarding returned value of read(), write()
 	 * causes error in building environment for perf
 	 */
-	int __used ret, wait_stat;
-	pid_t pid, retpid;
+	int __maybe_unused ret, wait_stat;
+	pid_t pid, retpid __maybe_unused;
 
 	argc = parse_options(argc, argv, options,
 			     bench_sched_pipe_usage, 0);
 
-	assert(!pipe(pipe_1));
-	assert(!pipe(pipe_2));
+	BUG_ON(pipe(pipe_1));
+	BUG_ON(pipe(pipe_2));
 
 	pid = fork();
 	assert(pid >= 0);
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 67522cf..9ea3854 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -239,7 +239,7 @@
 	NULL
 };
 
-int cmd_annotate(int argc, const char **argv, const char *prefix __used)
+int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
 {
 	struct perf_annotate annotate = {
 		.tool = {
@@ -282,6 +282,8 @@
 		    "Display raw encoding of assembly instructions (default)"),
 	OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
 		   "Specify disassembler style (e.g. -M intel for intel syntax)"),
+	OPT_STRING(0, "objdump", &objdump_path, "path",
+		   "objdump binary to use for disassembly and annotations"),
 	OPT_END()
 	};
 
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index 1f31002..cae9a5f 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -173,7 +173,7 @@
 		all_suite(&subsystems[i]);
 }
 
-int cmd_bench(int argc, const char **argv, const char *prefix __used)
+int cmd_bench(int argc, const char **argv, const char *prefix __maybe_unused)
 {
 	int i, j, status = 0;
 
diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c
index 29ad20e..8365455 100644
--- a/tools/perf/builtin-buildid-cache.c
+++ b/tools/perf/builtin-buildid-cache.c
@@ -43,15 +43,16 @@
 	}
 
 	build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
-	err = build_id_cache__add_s(sbuild_id, debugdir, filename, false);
+	err = build_id_cache__add_s(sbuild_id, debugdir, filename,
+				    false, false);
 	if (verbose)
 		pr_info("Adding %s %s: %s\n", sbuild_id, filename,
 			err ? "FAIL" : "Ok");
 	return err;
 }
 
-static int build_id_cache__remove_file(const char *filename __used,
-				       const char *debugdir __used)
+static int build_id_cache__remove_file(const char *filename __maybe_unused,
+				       const char *debugdir __maybe_unused)
 {
 	u8 build_id[BUILD_ID_SIZE];
 	char sbuild_id[BUILD_ID_SIZE * 2 + 1];
@@ -119,7 +120,8 @@
 	return 0;
 }
 
-int cmd_buildid_cache(int argc, const char **argv, const char *prefix __used)
+int cmd_buildid_cache(int argc, const char **argv,
+		      const char *prefix __maybe_unused)
 {
 	argc = parse_options(argc, argv, buildid_cache_options,
 			     buildid_cache_usage, 0);
diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c
index 7d68428..1159fee 100644
--- a/tools/perf/builtin-buildid-list.c
+++ b/tools/perf/builtin-buildid-list.c
@@ -103,7 +103,8 @@
 	return perf_session__list_build_ids();
 }
 
-int cmd_buildid_list(int argc, const char **argv, const char *prefix __used)
+int cmd_buildid_list(int argc, const char **argv,
+		     const char *prefix __maybe_unused)
 {
 	argc = parse_options(argc, argv, options, buildid_list_usage, 0);
 	setup_pager();
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index d29d350..761f419 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -10,6 +10,7 @@
 #include "util/event.h"
 #include "util/hist.h"
 #include "util/evsel.h"
+#include "util/evlist.h"
 #include "util/session.h"
 #include "util/tool.h"
 #include "util/sort.h"
@@ -24,11 +25,6 @@
 static bool  force;
 static bool show_displacement;
 
-struct perf_diff {
-	struct perf_tool tool;
-	struct perf_session *session;
-};
-
 static int hists__add_entry(struct hists *self,
 			    struct addr_location *al, u64 period)
 {
@@ -37,14 +33,12 @@
 	return -ENOMEM;
 }
 
-static int diff__process_sample_event(struct perf_tool *tool,
+static int diff__process_sample_event(struct perf_tool *tool __maybe_unused,
 				      union perf_event *event,
 				      struct perf_sample *sample,
-				      struct perf_evsel *evsel __used,
+				      struct perf_evsel *evsel,
 				      struct machine *machine)
 {
-	struct perf_diff *_diff = container_of(tool, struct perf_diff, tool);
-	struct perf_session *session = _diff->session;
 	struct addr_location al;
 
 	if (perf_event__preprocess_sample(event, machine, &al, sample, NULL) < 0) {
@@ -56,26 +50,24 @@
 	if (al.filtered || al.sym == NULL)
 		return 0;
 
-	if (hists__add_entry(&session->hists, &al, sample->period)) {
+	if (hists__add_entry(&evsel->hists, &al, sample->period)) {
 		pr_warning("problem incrementing symbol period, skipping event\n");
 		return -1;
 	}
 
-	session->hists.stats.total_period += sample->period;
+	evsel->hists.stats.total_period += sample->period;
 	return 0;
 }
 
-static struct perf_diff diff = {
-	.tool = {
-		.sample	= diff__process_sample_event,
-		.mmap	= perf_event__process_mmap,
-		.comm	= perf_event__process_comm,
-		.exit	= perf_event__process_task,
-		.fork	= perf_event__process_task,
-		.lost	= perf_event__process_lost,
-		.ordered_samples = true,
-		.ordering_requires_timestamps = true,
-	},
+static struct perf_tool tool = {
+	.sample	= diff__process_sample_event,
+	.mmap	= perf_event__process_mmap,
+	.comm	= perf_event__process_comm,
+	.exit	= perf_event__process_task,
+	.fork	= perf_event__process_task,
+	.lost	= perf_event__process_lost,
+	.ordered_samples = true,
+	.ordering_requires_timestamps = true,
 };
 
 static void perf_session__insert_hist_entry_by_name(struct rb_root *root,
@@ -146,34 +138,71 @@
 	}
 }
 
+static struct perf_evsel *evsel_match(struct perf_evsel *evsel,
+				      struct perf_evlist *evlist)
+{
+	struct perf_evsel *e;
+
+	list_for_each_entry(e, &evlist->entries, node)
+		if (perf_evsel__match2(evsel, e))
+			return e;
+
+	return NULL;
+}
+
 static int __cmd_diff(void)
 {
 	int ret, i;
 #define older (session[0])
 #define newer (session[1])
 	struct perf_session *session[2];
+	struct perf_evlist *evlist_new, *evlist_old;
+	struct perf_evsel *evsel;
+	bool first = true;
 
 	older = perf_session__new(input_old, O_RDONLY, force, false,
-				  &diff.tool);
+				  &tool);
 	newer = perf_session__new(input_new, O_RDONLY, force, false,
-				  &diff.tool);
+				  &tool);
 	if (session[0] == NULL || session[1] == NULL)
 		return -ENOMEM;
 
 	for (i = 0; i < 2; ++i) {
-		diff.session = session[i];
-		ret = perf_session__process_events(session[i], &diff.tool);
+		ret = perf_session__process_events(session[i], &tool);
 		if (ret)
 			goto out_delete;
-		hists__output_resort(&session[i]->hists);
 	}
 
-	if (show_displacement)
-		hists__resort_entries(&older->hists);
+	evlist_old = older->evlist;
+	evlist_new = newer->evlist;
 
-	hists__match(&older->hists, &newer->hists);
-	hists__fprintf(&newer->hists, &older->hists,
-		       show_displacement, true, 0, 0, stdout);
+	list_for_each_entry(evsel, &evlist_new->entries, node)
+		hists__output_resort(&evsel->hists);
+
+	list_for_each_entry(evsel, &evlist_old->entries, node) {
+		hists__output_resort(&evsel->hists);
+
+		if (show_displacement)
+			hists__resort_entries(&evsel->hists);
+	}
+
+	list_for_each_entry(evsel, &evlist_new->entries, node) {
+		struct perf_evsel *evsel_old;
+
+		evsel_old = evsel_match(evsel, evlist_old);
+		if (!evsel_old)
+			continue;
+
+		fprintf(stdout, "%s# Event '%s'\n#\n", first ? "" : "\n",
+			perf_evsel__name(evsel));
+
+		first = false;
+
+		hists__match(&evsel_old->hists, &evsel->hists);
+		hists__fprintf(&evsel->hists, &evsel_old->hists,
+			       show_displacement, true, 0, 0, stdout);
+	}
+
 out_delete:
 	for (i = 0; i < 2; ++i)
 		perf_session__delete(session[i]);
@@ -213,7 +242,7 @@
 	OPT_END()
 };
 
-int cmd_diff(int argc, const char **argv, const char *prefix __used)
+int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused)
 {
 	sort_order = diff__default_sort_order;
 	argc = parse_options(argc, argv, options, diff_usage, 0);
@@ -235,6 +264,7 @@
 	if (symbol__init() < 0)
 		return -1;
 
+	perf_hpp__init(true, show_displacement);
 	setup_sorting(diff_usage, options);
 	setup_pager();
 
diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c
index 0dd5a05..1fb1641 100644
--- a/tools/perf/builtin-evlist.c
+++ b/tools/perf/builtin-evlist.c
@@ -113,7 +113,7 @@
 	NULL
 };
 
-int cmd_evlist(int argc, const char **argv, const char *prefix __used)
+int cmd_evlist(int argc, const char **argv, const char *prefix __maybe_unused)
 {
 	struct perf_attr_details details = { .verbose = false, };
 	const char *input_name = NULL;
diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c
index 6d5a8a7..25c8b94 100644
--- a/tools/perf/builtin-help.c
+++ b/tools/perf/builtin-help.c
@@ -24,13 +24,14 @@
 } *man_viewer_info_list;
 
 enum help_format {
+	HELP_FORMAT_NONE,
 	HELP_FORMAT_MAN,
 	HELP_FORMAT_INFO,
 	HELP_FORMAT_WEB,
 };
 
 static bool show_all = false;
-static enum help_format help_format = HELP_FORMAT_MAN;
+static enum help_format help_format = HELP_FORMAT_NONE;
 static struct option builtin_help_options[] = {
 	OPT_BOOLEAN('a', "all", &show_all, "print all available commands"),
 	OPT_SET_UINT('m', "man", &help_format, "show man page", HELP_FORMAT_MAN),
@@ -54,7 +55,9 @@
 		return HELP_FORMAT_INFO;
 	if (!strcmp(format, "web") || !strcmp(format, "html"))
 		return HELP_FORMAT_WEB;
-	die("unrecognized help format '%s'", format);
+
+	pr_err("unrecognized help format '%s'", format);
+	return HELP_FORMAT_NONE;
 }
 
 static const char *get_man_viewer_info(const char *name)
@@ -259,6 +262,8 @@
 		if (!value)
 			return config_error_nonbool(var);
 		help_format = parse_help_format(value);
+		if (help_format == HELP_FORMAT_NONE)
+			return -1;
 		return 0;
 	}
 	if (!strcmp(var, "man.viewer")) {
@@ -352,7 +357,7 @@
 		warning("'%s': unknown man viewer.", name);
 }
 
-static void show_man_page(const char *perf_cmd)
+static int show_man_page(const char *perf_cmd)
 {
 	struct man_viewer_list *viewer;
 	const char *page = cmd_to_page(perf_cmd);
@@ -365,28 +370,35 @@
 	if (fallback)
 		exec_viewer(fallback, page);
 	exec_viewer("man", page);
-	die("no man viewer handled the request");
+
+	pr_err("no man viewer handled the request");
+	return -1;
 }
 
-static void show_info_page(const char *perf_cmd)
+static int show_info_page(const char *perf_cmd)
 {
 	const char *page = cmd_to_page(perf_cmd);
 	setenv("INFOPATH", system_path(PERF_INFO_PATH), 1);
 	execlp("info", "info", "perfman", page, NULL);
+	return -1;
 }
 
-static void get_html_page_path(struct strbuf *page_path, const char *page)
+static int get_html_page_path(struct strbuf *page_path, const char *page)
 {
 	struct stat st;
 	const char *html_path = system_path(PERF_HTML_PATH);
 
 	/* Check that we have a perf documentation directory. */
 	if (stat(mkpath("%s/perf.html", html_path), &st)
-	    || !S_ISREG(st.st_mode))
-		die("'%s': not a documentation directory.", html_path);
+	    || !S_ISREG(st.st_mode)) {
+		pr_err("'%s': not a documentation directory.", html_path);
+		return -1;
+	}
 
 	strbuf_init(page_path, 0);
 	strbuf_addf(page_path, "%s/%s.html", html_path, page);
+
+	return 0;
 }
 
 /*
@@ -401,19 +413,23 @@
 }
 #endif
 
-static void show_html_page(const char *perf_cmd)
+static int show_html_page(const char *perf_cmd)
 {
 	const char *page = cmd_to_page(perf_cmd);
 	struct strbuf page_path; /* it leaks but we exec bellow */
 
-	get_html_page_path(&page_path, page);
+	if (get_html_page_path(&page_path, page) != 0)
+		return -1;
 
 	open_html(page_path.buf);
+
+	return 0;
 }
 
-int cmd_help(int argc, const char **argv, const char *prefix __used)
+int cmd_help(int argc, const char **argv, const char *prefix __maybe_unused)
 {
 	const char *alias;
+	int rc = 0;
 
 	load_command_list("perf-", &main_cmds, &other_cmds);
 
@@ -444,16 +460,20 @@
 
 	switch (help_format) {
 	case HELP_FORMAT_MAN:
-		show_man_page(argv[0]);
+		rc = show_man_page(argv[0]);
 		break;
 	case HELP_FORMAT_INFO:
-		show_info_page(argv[0]);
+		rc = show_info_page(argv[0]);
 		break;
 	case HELP_FORMAT_WEB:
-		show_html_page(argv[0]);
+		rc = show_html_page(argv[0]);
+		break;
+	case HELP_FORMAT_NONE:
+		/* fall-through */
 	default:
+		rc = -1;
 		break;
 	}
 
-	return 0;
+	return rc;
 }
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 64d8ba2..1eaa661 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -17,9 +17,9 @@
 static char		const *input_name = "-";
 static bool		inject_build_ids;
 
-static int perf_event__repipe_synth(struct perf_tool *tool __used,
+static int perf_event__repipe_synth(struct perf_tool *tool __maybe_unused,
 				    union perf_event *event,
-				    struct machine *machine __used)
+				    struct machine *machine __maybe_unused)
 {
 	uint32_t size;
 	void *buf = event;
@@ -40,7 +40,8 @@
 
 static int perf_event__repipe_op2_synth(struct perf_tool *tool,
 					union perf_event *event,
-					struct perf_session *session __used)
+					struct perf_session *session
+					__maybe_unused)
 {
 	return perf_event__repipe_synth(tool, event, NULL);
 }
@@ -52,13 +53,14 @@
 }
 
 static int perf_event__repipe_tracing_data_synth(union perf_event *event,
-						 struct perf_session *session __used)
+						 struct perf_session *session
+						 __maybe_unused)
 {
 	return perf_event__repipe_synth(NULL, event, NULL);
 }
 
 static int perf_event__repipe_attr(union perf_event *event,
-				   struct perf_evlist **pevlist __used)
+				   struct perf_evlist **pevlist __maybe_unused)
 {
 	int ret;
 	ret = perf_event__process_attr(event, pevlist);
@@ -70,7 +72,7 @@
 
 static int perf_event__repipe(struct perf_tool *tool,
 			      union perf_event *event,
-			      struct perf_sample *sample __used,
+			      struct perf_sample *sample __maybe_unused,
 			      struct machine *machine)
 {
 	return perf_event__repipe_synth(tool, event, machine);
@@ -78,8 +80,8 @@
 
 static int perf_event__repipe_sample(struct perf_tool *tool,
 				     union perf_event *event,
-			      struct perf_sample *sample __used,
-			      struct perf_evsel *evsel __used,
+			      struct perf_sample *sample __maybe_unused,
+			      struct perf_evsel *evsel __maybe_unused,
 			      struct machine *machine)
 {
 	return perf_event__repipe_synth(tool, event, machine);
@@ -163,7 +165,7 @@
 static int perf_event__inject_buildid(struct perf_tool *tool,
 				      union perf_event *event,
 				      struct perf_sample *sample,
-				      struct perf_evsel *evsel __used,
+				      struct perf_evsel *evsel __maybe_unused,
 				      struct machine *machine)
 {
 	struct addr_location al;
@@ -224,7 +226,7 @@
 
 extern volatile int session_done;
 
-static void sig_handler(int sig __attribute__((__unused__)))
+static void sig_handler(int sig __maybe_unused)
 {
 	session_done = 1;
 }
@@ -267,7 +269,7 @@
 	OPT_END()
 };
 
-int cmd_inject(int argc, const char **argv, const char *prefix __used)
+int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
 {
 	argc = parse_options(argc, argv, options, report_usage, 0);
 
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index fc6607b..f5f8a6b7 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -58,41 +58,52 @@
 
 #define PATH_SYS_NODE	"/sys/devices/system/node"
 
-static void init_cpunode_map(void)
+static int init_cpunode_map(void)
 {
 	FILE *fp;
-	int i;
+	int i, err = -1;
 
 	fp = fopen("/sys/devices/system/cpu/kernel_max", "r");
 	if (!fp) {
 		max_cpu_num = 4096;
-		return;
+		return 0;
 	}
 
-	if (fscanf(fp, "%d", &max_cpu_num) < 1)
-		die("Failed to read 'kernel_max' from sysfs");
+	if (fscanf(fp, "%d", &max_cpu_num) < 1) {
+		pr_err("Failed to read 'kernel_max' from sysfs");
+		goto out_close;
+	}
+
 	max_cpu_num++;
 
 	cpunode_map = calloc(max_cpu_num, sizeof(int));
-	if (!cpunode_map)
-		die("calloc");
+	if (!cpunode_map) {
+		pr_err("%s: calloc failed\n", __func__);
+		goto out_close;
+	}
+
 	for (i = 0; i < max_cpu_num; i++)
 		cpunode_map[i] = -1;
+
+	err = 0;
+out_close:
 	fclose(fp);
+	return err;
 }
 
-static void setup_cpunode_map(void)
+static int setup_cpunode_map(void)
 {
 	struct dirent *dent1, *dent2;
 	DIR *dir1, *dir2;
 	unsigned int cpu, mem;
 	char buf[PATH_MAX];
 
-	init_cpunode_map();
+	if (init_cpunode_map())
+		return -1;
 
 	dir1 = opendir(PATH_SYS_NODE);
 	if (!dir1)
-		return;
+		return -1;
 
 	while ((dent1 = readdir(dir1)) != NULL) {
 		if (dent1->d_type != DT_DIR ||
@@ -112,10 +123,11 @@
 		closedir(dir2);
 	}
 	closedir(dir1);
+	return 0;
 }
 
-static void insert_alloc_stat(unsigned long call_site, unsigned long ptr,
-			      int bytes_req, int bytes_alloc, int cpu)
+static int insert_alloc_stat(unsigned long call_site, unsigned long ptr,
+			     int bytes_req, int bytes_alloc, int cpu)
 {
 	struct rb_node **node = &root_alloc_stat.rb_node;
 	struct rb_node *parent = NULL;
@@ -139,8 +151,10 @@
 		data->bytes_alloc += bytes_alloc;
 	} else {
 		data = malloc(sizeof(*data));
-		if (!data)
-			die("malloc");
+		if (!data) {
+			pr_err("%s: malloc failed\n", __func__);
+			return -1;
+		}
 		data->ptr = ptr;
 		data->pingpong = 0;
 		data->hit = 1;
@@ -152,9 +166,10 @@
 	}
 	data->call_site = call_site;
 	data->alloc_cpu = cpu;
+	return 0;
 }
 
-static void insert_caller_stat(unsigned long call_site,
+static int insert_caller_stat(unsigned long call_site,
 			      int bytes_req, int bytes_alloc)
 {
 	struct rb_node **node = &root_caller_stat.rb_node;
@@ -179,8 +194,10 @@
 		data->bytes_alloc += bytes_alloc;
 	} else {
 		data = malloc(sizeof(*data));
-		if (!data)
-			die("malloc");
+		if (!data) {
+			pr_err("%s: malloc failed\n", __func__);
+			return -1;
+		}
 		data->call_site = call_site;
 		data->pingpong = 0;
 		data->hit = 1;
@@ -190,11 +207,12 @@
 		rb_link_node(&data->node, parent, node);
 		rb_insert_color(&data->node, &root_caller_stat);
 	}
+
+	return 0;
 }
 
-static void perf_evsel__process_alloc_event(struct perf_evsel *evsel,
-					    struct perf_sample *sample,
-					    int node)
+static int perf_evsel__process_alloc_event(struct perf_evsel *evsel,
+					   struct perf_sample *sample, int node)
 {
 	struct event_format *event = evsel->tp_format;
 	void *data = sample->raw_data;
@@ -209,8 +227,9 @@
 	bytes_req = raw_field_value(event, "bytes_req", data);
 	bytes_alloc = raw_field_value(event, "bytes_alloc", data);
 
-	insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, cpu);
-	insert_caller_stat(call_site, bytes_req, bytes_alloc);
+	if (insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, cpu) ||
+	    insert_caller_stat(call_site, bytes_req, bytes_alloc))
+		return -1;
 
 	total_requested += bytes_req;
 	total_allocated += bytes_alloc;
@@ -222,6 +241,7 @@
 			nr_cross_allocs++;
 	}
 	nr_allocs++;
+	return 0;
 }
 
 static int ptr_cmp(struct alloc_stat *, struct alloc_stat *);
@@ -252,8 +272,8 @@
 	return NULL;
 }
 
-static void perf_evsel__process_free_event(struct perf_evsel *evsel,
-					   struct perf_sample *sample)
+static int perf_evsel__process_free_event(struct perf_evsel *evsel,
+					  struct perf_sample *sample)
 {
 	unsigned long ptr = raw_field_value(evsel->tp_format, "ptr",
 					    sample->raw_data);
@@ -261,44 +281,46 @@
 
 	s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp);
 	if (!s_alloc)
-		return;
+		return 0;
 
 	if ((short)sample->cpu != s_alloc->alloc_cpu) {
 		s_alloc->pingpong++;
 
 		s_caller = search_alloc_stat(0, s_alloc->call_site,
 					     &root_caller_stat, callsite_cmp);
-		assert(s_caller);
+		if (!s_caller)
+			return -1;
 		s_caller->pingpong++;
 	}
 	s_alloc->alloc_cpu = -1;
+
+	return 0;
 }
 
-static void perf_evsel__process_kmem_event(struct perf_evsel *evsel,
-					   struct perf_sample *sample)
+static int perf_evsel__process_kmem_event(struct perf_evsel *evsel,
+					  struct perf_sample *sample)
 {
 	struct event_format *event = evsel->tp_format;
 
 	if (!strcmp(event->name, "kmalloc") ||
 	    !strcmp(event->name, "kmem_cache_alloc")) {
-		perf_evsel__process_alloc_event(evsel, sample, 0);
-		return;
+		return perf_evsel__process_alloc_event(evsel, sample, 0);
 	}
 
 	if (!strcmp(event->name, "kmalloc_node") ||
 	    !strcmp(event->name, "kmem_cache_alloc_node")) {
-		perf_evsel__process_alloc_event(evsel, sample, 1);
-		return;
+		return perf_evsel__process_alloc_event(evsel, sample, 1);
 	}
 
 	if (!strcmp(event->name, "kfree") ||
 	    !strcmp(event->name, "kmem_cache_free")) {
-		perf_evsel__process_free_event(evsel, sample);
-		return;
+		return perf_evsel__process_free_event(evsel, sample);
 	}
+
+	return 0;
 }
 
-static int process_sample_event(struct perf_tool *tool __used,
+static int process_sample_event(struct perf_tool *tool __maybe_unused,
 				union perf_event *event,
 				struct perf_sample *sample,
 				struct perf_evsel *evsel,
@@ -314,8 +336,7 @@
 
 	dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
 
-	perf_evsel__process_kmem_event(evsel, sample);
-	return 0;
+	return perf_evsel__process_kmem_event(evsel, sample);
 }
 
 static struct perf_tool perf_kmem = {
@@ -613,8 +634,10 @@
 	for (i = 0; i < NUM_AVAIL_SORTS; i++) {
 		if (!strcmp(avail_sorts[i]->name, tok)) {
 			sort = malloc(sizeof(*sort));
-			if (!sort)
-				die("malloc");
+			if (!sort) {
+				pr_err("%s: malloc failed\n", __func__);
+				return -1;
+			}
 			memcpy(sort, avail_sorts[i], sizeof(*sort));
 			list_add_tail(&sort->list, list);
 			return 0;
@@ -629,8 +652,10 @@
 	char *tok;
 	char *str = strdup(arg);
 
-	if (!str)
-		die("strdup");
+	if (!str) {
+		pr_err("%s: strdup failed\n", __func__);
+		return -1;
+	}
 
 	while (true) {
 		tok = strsep(&str, ",");
@@ -647,8 +672,8 @@
 	return 0;
 }
 
-static int parse_sort_opt(const struct option *opt __used,
-			  const char *arg, int unset __used)
+static int parse_sort_opt(const struct option *opt __maybe_unused,
+			  const char *arg, int unset __maybe_unused)
 {
 	if (!arg)
 		return -1;
@@ -661,22 +686,24 @@
 	return 0;
 }
 
-static int parse_caller_opt(const struct option *opt __used,
-			  const char *arg __used, int unset __used)
+static int parse_caller_opt(const struct option *opt __maybe_unused,
+			    const char *arg __maybe_unused,
+			    int unset __maybe_unused)
 {
 	caller_flag = (alloc_flag + 1);
 	return 0;
 }
 
-static int parse_alloc_opt(const struct option *opt __used,
-			  const char *arg __used, int unset __used)
+static int parse_alloc_opt(const struct option *opt __maybe_unused,
+			   const char *arg __maybe_unused,
+			   int unset __maybe_unused)
 {
 	alloc_flag = (caller_flag + 1);
 	return 0;
 }
 
-static int parse_line_opt(const struct option *opt __used,
-			  const char *arg, int unset __used)
+static int parse_line_opt(const struct option *opt __maybe_unused,
+			  const char *arg, int unset __maybe_unused)
 {
 	int lines;
 
@@ -746,7 +773,7 @@
 	return cmd_record(i, rec_argv, NULL);
 }
 
-int cmd_kmem(int argc, const char **argv, const char *prefix __used)
+int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
 {
 	argc = parse_options(argc, argv, kmem_options, kmem_usage, 0);
 
@@ -758,7 +785,8 @@
 	if (!strncmp(argv[0], "rec", 3)) {
 		return __cmd_record(argc, argv);
 	} else if (!strcmp(argv[0], "stat")) {
-		setup_cpunode_map();
+		if (setup_cpunode_map())
+			return -1;
 
 		if (list_empty(&caller_sort))
 			setup_sorting(&caller_sort, default_sort_order);
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 9fc6e0f..4d2aa2c 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -102,7 +102,7 @@
 	return cmd_buildid_list(i, rec_argv, NULL);
 }
 
-int cmd_kvm(int argc, const char **argv, const char *prefix __used)
+int cmd_kvm(int argc, const char **argv, const char *prefix __maybe_unused)
 {
 	perf_host  = 0;
 	perf_guest = 1;
diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
index bdcff81..1948ece 100644
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -14,7 +14,7 @@
 #include "util/parse-events.h"
 #include "util/cache.h"
 
-int cmd_list(int argc, const char **argv, const char *prefix __used)
+int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused)
 {
 	setup_pager();
 
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 585aae2..a803520 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -161,8 +161,10 @@
 		return st;
 
 	st = zalloc(sizeof(struct thread_stat));
-	if (!st)
-		die("memory allocation failed\n");
+	if (!st) {
+		pr_err("memory allocation failed\n");
+		return NULL;
+	}
 
 	st->tid = tid;
 	INIT_LIST_HEAD(&st->seq_list);
@@ -181,8 +183,10 @@
 	struct thread_stat *st;
 
 	st = zalloc(sizeof(struct thread_stat));
-	if (!st)
-		die("memory allocation failed\n");
+	if (!st) {
+		pr_err("memory allocation failed\n");
+		return NULL;
+	}
 	st->tid = tid;
 	INIT_LIST_HEAD(&st->seq_list);
 
@@ -248,18 +252,20 @@
 	{ NULL, NULL }
 };
 
-static void select_key(void)
+static int select_key(void)
 {
 	int i;
 
 	for (i = 0; keys[i].name; i++) {
 		if (!strcmp(keys[i].name, sort_key)) {
 			compare = keys[i].key;
-			return;
+			return 0;
 		}
 	}
 
-	die("Unknown compare key:%s\n", sort_key);
+	pr_err("Unknown compare key: %s\n", sort_key);
+
+	return -1;
 }
 
 static void insert_to_result(struct lock_stat *st,
@@ -324,7 +330,8 @@
 	return new;
 
 alloc_failed:
-	die("memory allocation failed\n");
+	pr_err("memory allocation failed\n");
+	return NULL;
 }
 
 static const char *input_name;
@@ -356,16 +363,16 @@
 };
 
 struct trace_lock_handler {
-	void (*acquire_event)(struct trace_acquire_event *,
+	int (*acquire_event)(struct trace_acquire_event *,
 			      const struct perf_sample *sample);
 
-	void (*acquired_event)(struct trace_acquired_event *,
+	int (*acquired_event)(struct trace_acquired_event *,
 			       const struct perf_sample *sample);
 
-	void (*contended_event)(struct trace_contended_event *,
+	int (*contended_event)(struct trace_contended_event *,
 				const struct perf_sample *sample);
 
-	void (*release_event)(struct trace_release_event *,
+	int (*release_event)(struct trace_release_event *,
 			      const struct perf_sample *sample);
 };
 
@@ -379,8 +386,10 @@
 	}
 
 	seq = zalloc(sizeof(struct lock_seq_stat));
-	if (!seq)
-		die("Not enough memory\n");
+	if (!seq) {
+		pr_err("memory allocation failed\n");
+		return NULL;
+	}
 	seq->state = SEQ_STATE_UNINITIALIZED;
 	seq->addr = addr;
 
@@ -403,7 +412,7 @@
 	READ_LOCK = 2,
 };
 
-static void
+static int
 report_lock_acquire_event(struct trace_acquire_event *acquire_event,
 			  const struct perf_sample *sample)
 {
@@ -412,11 +421,18 @@
 	struct lock_seq_stat *seq;
 
 	ls = lock_stat_findnew(acquire_event->addr, acquire_event->name);
+	if (!ls)
+		return -1;
 	if (ls->discard)
-		return;
+		return 0;
 
 	ts = thread_stat_findnew(sample->tid);
+	if (!ts)
+		return -1;
+
 	seq = get_seq(ts, acquire_event->addr);
+	if (!seq)
+		return -1;
 
 	switch (seq->state) {
 	case SEQ_STATE_UNINITIALIZED:
@@ -461,10 +477,10 @@
 	ls->nr_acquire++;
 	seq->prev_event_time = sample->time;
 end:
-	return;
+	return 0;
 }
 
-static void
+static int
 report_lock_acquired_event(struct trace_acquired_event *acquired_event,
 			   const struct perf_sample *sample)
 {
@@ -475,16 +491,23 @@
 	u64 contended_term;
 
 	ls = lock_stat_findnew(acquired_event->addr, acquired_event->name);
+	if (!ls)
+		return -1;
 	if (ls->discard)
-		return;
+		return 0;
 
 	ts = thread_stat_findnew(sample->tid);
+	if (!ts)
+		return -1;
+
 	seq = get_seq(ts, acquired_event->addr);
+	if (!seq)
+		return -1;
 
 	switch (seq->state) {
 	case SEQ_STATE_UNINITIALIZED:
 		/* orphan event, do nothing */
-		return;
+		return 0;
 	case SEQ_STATE_ACQUIRING:
 		break;
 	case SEQ_STATE_CONTENDED:
@@ -515,10 +538,10 @@
 	ls->nr_acquired++;
 	seq->prev_event_time = timestamp;
 end:
-	return;
+	return 0;
 }
 
-static void
+static int
 report_lock_contended_event(struct trace_contended_event *contended_event,
 			    const struct perf_sample *sample)
 {
@@ -527,16 +550,23 @@
 	struct lock_seq_stat *seq;
 
 	ls = lock_stat_findnew(contended_event->addr, contended_event->name);
+	if (!ls)
+		return -1;
 	if (ls->discard)
-		return;
+		return 0;
 
 	ts = thread_stat_findnew(sample->tid);
+	if (!ts)
+		return -1;
+
 	seq = get_seq(ts, contended_event->addr);
+	if (!seq)
+		return -1;
 
 	switch (seq->state) {
 	case SEQ_STATE_UNINITIALIZED:
 		/* orphan event, do nothing */
-		return;
+		return 0;
 	case SEQ_STATE_ACQUIRING:
 		break;
 	case SEQ_STATE_RELEASED:
@@ -559,10 +589,10 @@
 	ls->nr_contended++;
 	seq->prev_event_time = sample->time;
 end:
-	return;
+	return 0;
 }
 
-static void
+static int
 report_lock_release_event(struct trace_release_event *release_event,
 			  const struct perf_sample *sample)
 {
@@ -571,11 +601,18 @@
 	struct lock_seq_stat *seq;
 
 	ls = lock_stat_findnew(release_event->addr, release_event->name);
+	if (!ls)
+		return -1;
 	if (ls->discard)
-		return;
+		return 0;
 
 	ts = thread_stat_findnew(sample->tid);
+	if (!ts)
+		return -1;
+
 	seq = get_seq(ts, release_event->addr);
+	if (!seq)
+		return -1;
 
 	switch (seq->state) {
 	case SEQ_STATE_UNINITIALIZED:
@@ -609,7 +646,7 @@
 	list_del(&seq->list);
 	free(seq);
 end:
-	return;
+	return 0;
 }
 
 /* lock oriented handlers */
@@ -623,13 +660,14 @@
 
 static struct trace_lock_handler *trace_handler;
 
-static void perf_evsel__process_lock_acquire(struct perf_evsel *evsel,
+static int perf_evsel__process_lock_acquire(struct perf_evsel *evsel,
 					     struct perf_sample *sample)
 {
 	struct trace_acquire_event acquire_event;
 	struct event_format *event = evsel->tp_format;
 	void *data = sample->raw_data;
 	u64 tmp;		/* this is required for casting... */
+	int rc = 0;
 
 	tmp = raw_field_value(event, "lockdep_addr", data);
 	memcpy(&acquire_event.addr, &tmp, sizeof(void *));
@@ -637,70 +675,84 @@
 	acquire_event.flag = (int)raw_field_value(event, "flag", data);
 
 	if (trace_handler->acquire_event)
-		trace_handler->acquire_event(&acquire_event, sample);
+		rc = trace_handler->acquire_event(&acquire_event, sample);
+
+	return rc;
 }
 
-static void perf_evsel__process_lock_acquired(struct perf_evsel *evsel,
+static int perf_evsel__process_lock_acquired(struct perf_evsel *evsel,
 					      struct perf_sample *sample)
 {
 	struct trace_acquired_event acquired_event;
 	struct event_format *event = evsel->tp_format;
 	void *data = sample->raw_data;
 	u64 tmp;		/* this is required for casting... */
+	int rc = 0;
 
 	tmp = raw_field_value(event, "lockdep_addr", data);
 	memcpy(&acquired_event.addr, &tmp, sizeof(void *));
 	acquired_event.name = (char *)raw_field_ptr(event, "name", data);
 
-	if (trace_handler->acquire_event)
-		trace_handler->acquired_event(&acquired_event, sample);
+	if (trace_handler->acquired_event)
+		rc = trace_handler->acquired_event(&acquired_event, sample);
+
+	return rc;
 }
 
-static void perf_evsel__process_lock_contended(struct perf_evsel *evsel,
+static int perf_evsel__process_lock_contended(struct perf_evsel *evsel,
 					       struct perf_sample *sample)
 {
 	struct trace_contended_event contended_event;
 	struct event_format *event = evsel->tp_format;
 	void *data = sample->raw_data;
 	u64 tmp;		/* this is required for casting... */
+	int rc = 0;
 
 	tmp = raw_field_value(event, "lockdep_addr", data);
 	memcpy(&contended_event.addr, &tmp, sizeof(void *));
 	contended_event.name = (char *)raw_field_ptr(event, "name", data);
 
-	if (trace_handler->acquire_event)
-		trace_handler->contended_event(&contended_event, sample);
+	if (trace_handler->contended_event)
+		rc = trace_handler->contended_event(&contended_event, sample);
+
+	return rc;
 }
 
-static void perf_evsel__process_lock_release(struct perf_evsel *evsel,
+static int perf_evsel__process_lock_release(struct perf_evsel *evsel,
 					     struct perf_sample *sample)
 {
 	struct trace_release_event release_event;
 	struct event_format *event = evsel->tp_format;
 	void *data = sample->raw_data;
 	u64 tmp;		/* this is required for casting... */
+	int rc = 0;
 
 	tmp = raw_field_value(event, "lockdep_addr", data);
 	memcpy(&release_event.addr, &tmp, sizeof(void *));
 	release_event.name = (char *)raw_field_ptr(event, "name", data);
 
-	if (trace_handler->acquire_event)
-		trace_handler->release_event(&release_event, sample);
+	if (trace_handler->release_event)
+		rc = trace_handler->release_event(&release_event, sample);
+
+	return rc;
 }
 
-static void perf_evsel__process_lock_event(struct perf_evsel *evsel,
+static int perf_evsel__process_lock_event(struct perf_evsel *evsel,
 					   struct perf_sample *sample)
 {
 	struct event_format *event = evsel->tp_format;
+	int rc = 0;
 
 	if (!strcmp(event->name, "lock_acquire"))
-		perf_evsel__process_lock_acquire(evsel, sample);
+		rc = perf_evsel__process_lock_acquire(evsel, sample);
 	if (!strcmp(event->name, "lock_acquired"))
-		perf_evsel__process_lock_acquired(evsel, sample);
+		rc = perf_evsel__process_lock_acquired(evsel, sample);
 	if (!strcmp(event->name, "lock_contended"))
-		perf_evsel__process_lock_contended(evsel, sample);
+		rc = perf_evsel__process_lock_contended(evsel, sample);
 	if (!strcmp(event->name, "lock_release"))
-		perf_evsel__process_lock_release(evsel, sample);
+		rc = perf_evsel__process_lock_release(evsel, sample);
+
+	return rc;
 }
 
 static void print_bad_events(int bad, int total)
@@ -802,17 +854,23 @@
 	}
 }
 
-static void dump_info(void)
+static int dump_info(void)
 {
+	int rc = 0;
+
 	if (info_threads)
 		dump_threads();
 	else if (info_map)
 		dump_map();
-	else
-		die("Unknown type of information\n");
+	else {
+		rc = -1;
+		pr_err("Unknown type of information\n");
+	}
+
+	return rc;
 }
 
-static int process_sample_event(struct perf_tool *tool __used,
+static int process_sample_event(struct perf_tool *tool __maybe_unused,
 				union perf_event *event,
 				struct perf_sample *sample,
 				struct perf_evsel *evsel,
@@ -826,8 +884,7 @@
 		return -1;
 	}
 
-	perf_evsel__process_lock_event(evsel, sample);
-	return 0;
+	return perf_evsel__process_lock_event(evsel, sample);
 }
 
 static struct perf_tool eops = {
@@ -839,8 +896,10 @@
 static int read_events(void)
 {
 	session = perf_session__new(input_name, O_RDONLY, 0, false, &eops);
-	if (!session)
-		die("Initializing perf session failed\n");
+	if (!session) {
+		pr_err("Initializing perf session failed\n");
+		return -1;
+	}
 
 	return perf_session__process_events(session, &eops);
 }
@@ -857,13 +916,18 @@
 	}
 }
 
-static void __cmd_report(void)
+static int __cmd_report(void)
 {
 	setup_pager();
-	select_key();
-	read_events();
+
+	if ((select_key() != 0) ||
+	    (read_events() != 0))
+		return -1;
+
 	sort_result();
 	print_result();
+
+	return 0;
 }
 
 static const char * const report_usage[] = {
@@ -956,9 +1020,10 @@
 	return cmd_record(i, rec_argv, NULL);
 }
 
-int cmd_lock(int argc, const char **argv, const char *prefix __used)
+int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused)
 {
 	unsigned int i;
+	int rc = 0;
 
 	symbol__init();
 	for (i = 0; i < LOCKHASH_SIZE; i++)
@@ -993,11 +1058,13 @@
 		/* recycling report_lock_ops */
 		trace_handler = &report_lock_ops;
 		setup_pager();
-		read_events();
-		dump_info();
+		if (read_events() != 0)
+			rc = -1;
+		else
+			rc = dump_info();
 	} else {
 		usage_with_options(lock_usage, lock_options);
 	}
 
-	return 0;
+	return rc;
 }
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index e215ae6..118aa89 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -143,8 +143,8 @@
 	return ret;
 }
 
-static int opt_add_probe_event(const struct option *opt __used,
-			      const char *str, int unset __used)
+static int opt_add_probe_event(const struct option *opt __maybe_unused,
+			      const char *str, int unset __maybe_unused)
 {
 	if (str) {
 		params.mod_events = true;
@@ -153,8 +153,8 @@
 		return 0;
 }
 
-static int opt_del_probe_event(const struct option *opt __used,
-			       const char *str, int unset __used)
+static int opt_del_probe_event(const struct option *opt __maybe_unused,
+			       const char *str, int unset __maybe_unused)
 {
 	if (str) {
 		params.mod_events = true;
@@ -166,7 +166,7 @@
 }
 
 static int opt_set_target(const struct option *opt, const char *str,
-			int unset __used)
+			int unset __maybe_unused)
 {
 	int ret = -ENOENT;
 
@@ -188,8 +188,8 @@
 }
 
 #ifdef DWARF_SUPPORT
-static int opt_show_lines(const struct option *opt __used,
-			  const char *str, int unset __used)
+static int opt_show_lines(const struct option *opt __maybe_unused,
+			  const char *str, int unset __maybe_unused)
 {
 	int ret = 0;
 
@@ -209,8 +209,8 @@
 	return ret;
 }
 
-static int opt_show_vars(const struct option *opt __used,
-			 const char *str, int unset __used)
+static int opt_show_vars(const struct option *opt __maybe_unused,
+			 const char *str, int unset __maybe_unused)
 {
 	struct perf_probe_event *pev = &params.events[params.nevents];
 	int ret;
@@ -229,8 +229,8 @@
 }
 #endif
 
-static int opt_set_filter(const struct option *opt __used,
-			  const char *str, int unset __used)
+static int opt_set_filter(const struct option *opt __maybe_unused,
+			  const char *str, int unset __maybe_unused)
 {
 	const char *err;
 
@@ -327,7 +327,7 @@
 	OPT_END()
 };
 
-int cmd_probe(int argc, const char **argv, const char *prefix __used)
+int cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
 {
 	int ret;
 
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 479ff2a..c643ed6 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -71,32 +71,38 @@
 	rec->bytes_written += size;
 }
 
-static void write_output(struct perf_record *rec, void *buf, size_t size)
+static int write_output(struct perf_record *rec, void *buf, size_t size)
 {
 	while (size) {
 		int ret = write(rec->output, buf, size);
 
-		if (ret < 0)
-			die("failed to write");
+		if (ret < 0) {
+			pr_err("failed to write\n");
+			return -1;
+		}
 
 		size -= ret;
 		buf += ret;
 
 		rec->bytes_written += ret;
 	}
+
+	return 0;
 }
 
 static int process_synthesized_event(struct perf_tool *tool,
 				     union perf_event *event,
-				     struct perf_sample *sample __used,
-				     struct machine *machine __used)
+				     struct perf_sample *sample __maybe_unused,
+				     struct machine *machine __maybe_unused)
 {
 	struct perf_record *rec = container_of(tool, struct perf_record, tool);
-	write_output(rec, event, event->header.size);
+	if (write_output(rec, event, event->header.size) < 0)
+		return -1;
+
 	return 0;
 }
 
-static void perf_record__mmap_read(struct perf_record *rec,
+static int perf_record__mmap_read(struct perf_record *rec,
 				   struct perf_mmap *md)
 {
 	unsigned int head = perf_mmap__read_head(md);
@@ -104,9 +110,10 @@
 	unsigned char *data = md->base + rec->page_size;
 	unsigned long size;
 	void *buf;
+	int rc = 0;
 
 	if (old == head)
-		return;
+		return 0;
 
 	rec->samples++;
 
@@ -117,17 +124,26 @@
 		size = md->mask + 1 - (old & md->mask);
 		old += size;
 
-		write_output(rec, buf, size);
+		if (write_output(rec, buf, size) < 0) {
+			rc = -1;
+			goto out;
+		}
 	}
 
 	buf = &data[old & md->mask];
 	size = head - old;
 	old += size;
 
-	write_output(rec, buf, size);
+	if (write_output(rec, buf, size) < 0) {
+		rc = -1;
+		goto out;
+	}
 
 	md->prev = old;
 	perf_mmap__write_tail(md, old);
+
+out:
+	return rc;
 }
 
 static volatile int done = 0;
@@ -143,7 +159,7 @@
 	signr = sig;
 }
 
-static void perf_record__sig_exit(int exit_status __used, void *arg)
+static void perf_record__sig_exit(int exit_status __maybe_unused, void *arg)
 {
 	struct perf_record *rec = arg;
 	int status;
@@ -183,12 +199,13 @@
 	return true;
 }
 
-static void perf_record__open(struct perf_record *rec)
+static int perf_record__open(struct perf_record *rec)
 {
 	struct perf_evsel *pos;
 	struct perf_evlist *evlist = rec->evlist;
 	struct perf_session *session = rec->session;
 	struct perf_record_opts *opts = &rec->opts;
+	int rc = 0;
 
 	perf_evlist__config_attrs(evlist, opts);
 
@@ -222,10 +239,13 @@
 
 			if (err == EPERM || err == EACCES) {
 				ui__error_paranoid();
-				exit(EXIT_FAILURE);
+				rc = -err;
+				goto out;
 			} else if (err ==  ENODEV && opts->target.cpu_list) {
-				die("No such device - did you specify"
-					" an out-of-range profile CPU?\n");
+				pr_err("No such device - did you specify"
+				       " an out-of-range profile CPU?\n");
+				rc = -err;
+				goto out;
 			} else if (err == EINVAL) {
 				if (!opts->exclude_guest_missing &&
 				    (attr->exclude_guest || attr->exclude_host)) {
@@ -272,7 +292,8 @@
 			if (err == ENOENT) {
 				ui__error("The %s event is not supported.\n",
 					  perf_evsel__name(pos));
-				exit(EXIT_FAILURE);
+				rc = -err;
+				goto out;
 			}
 
 			printf("\n");
@@ -280,34 +301,46 @@
 			      err, strerror(err));
 
 #if defined(__i386__) || defined(__x86_64__)
-			if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
-				die("No hardware sampling interrupt available."
-				    " No APIC? If so then you can boot the kernel"
-				    " with the \"lapic\" boot parameter to"
-				    " force-enable it.\n");
+			if (attr->type == PERF_TYPE_HARDWARE &&
+			    err == EOPNOTSUPP) {
+				pr_err("No hardware sampling interrupt available."
+				       " No APIC? If so then you can boot the kernel"
+				       " with the \"lapic\" boot parameter to"
+				       " force-enable it.\n");
+				rc = -err;
+				goto out;
+			}
 #endif
 
-			die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
+			pr_err("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
+			rc = -err;
+			goto out;
 		}
 	}
 
 	if (perf_evlist__set_filters(evlist)) {
 		error("failed to set filter with %d (%s)\n", errno,
 			strerror(errno));
-		exit(-1);
+		rc = -1;
+		goto out;
 	}
 
 	if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
-		if (errno == EPERM)
-			die("Permission error mapping pages.\n"
-			    "Consider increasing "
-			    "/proc/sys/kernel/perf_event_mlock_kb,\n"
-			    "or try again with a smaller value of -m/--mmap_pages.\n"
-			    "(current value: %d)\n", opts->mmap_pages);
-		else if (!is_power_of_2(opts->mmap_pages))
-			die("--mmap_pages/-m value must be a power of two.");
-
-		die("failed to mmap with %d (%s)\n", errno, strerror(errno));
+		if (errno == EPERM) {
+			pr_err("Permission error mapping pages.\n"
+			       "Consider increasing "
+			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
+			       "or try again with a smaller value of -m/--mmap_pages.\n"
+			       "(current value: %d)\n", opts->mmap_pages);
+			rc = -errno;
+		} else if (!is_power_of_2(opts->mmap_pages)) {
+			pr_err("--mmap_pages/-m value must be a power of two.");
+			rc = -EINVAL;
+		} else {
+			pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno));
+			rc = -errno;
+		}
+		goto out;
 	}
 
 	if (rec->file_new)
@@ -315,11 +348,14 @@
 	else {
 		if (!perf_evlist__equal(session->evlist, evlist)) {
 			fprintf(stderr, "incompatible append\n");
-			exit(-1);
+			rc = -1;
+			goto out;
 		}
  	}
 
 	perf_session__set_id_hdr_size(session);
+out:
+	return rc;
 }
 
 static int process_buildids(struct perf_record *rec)
@@ -335,10 +371,13 @@
 					      size, &build_id__mark_dso_hit_ops);
 }
 
-static void perf_record__exit(int status __used, void *arg)
+static void perf_record__exit(int status, void *arg)
 {
 	struct perf_record *rec = arg;
 
+	if (status != 0)
+		return;
+
 	if (!rec->opts.pipe_output) {
 		rec->session->header.data_size += rec->bytes_written;
 
@@ -393,17 +432,26 @@
 	.type = PERF_RECORD_FINISHED_ROUND,
 };
 
-static void perf_record__mmap_read_all(struct perf_record *rec)
+static int perf_record__mmap_read_all(struct perf_record *rec)
 {
 	int i;
+	int rc = 0;
 
 	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
-		if (rec->evlist->mmap[i].base)
-			perf_record__mmap_read(rec, &rec->evlist->mmap[i]);
+		if (rec->evlist->mmap[i].base) {
+			if (perf_record__mmap_read(rec, &rec->evlist->mmap[i]) != 0) {
+				rc = -1;
+				goto out;
+			}
+		}
 	}
 
 	if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
-		write_output(rec, &finished_round_event, sizeof(finished_round_event));
+		rc = write_output(rec, &finished_round_event,
+				  sizeof(finished_round_event));
+
+out:
+	return rc;
 }
 
 static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
@@ -463,7 +511,7 @@
 		output = open(output_name, flags, S_IRUSR | S_IWUSR);
 	if (output < 0) {
 		perror("failed to create output file");
-		exit(-1);
+		return -1;
 	}
 
 	rec->output = output;
@@ -503,7 +551,10 @@
 		}
 	}
 
-	perf_record__open(rec);
+	if (perf_record__open(rec) != 0) {
+		err = -1;
+		goto out_delete_session;
+	}
 
 	/*
 	 * perf_session__delete(session) will be called at perf_record__exit()
@@ -513,19 +564,20 @@
 	if (opts->pipe_output) {
 		err = perf_header__write_pipe(output);
 		if (err < 0)
-			return err;
+			goto out_delete_session;
 	} else if (rec->file_new) {
 		err = perf_session__write_header(session, evsel_list,
 						 output, false);
 		if (err < 0)
-			return err;
+			goto out_delete_session;
 	}
 
 	if (!rec->no_buildid
 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
 		pr_err("Couldn't generate buildids. "
 		       "Use --no-buildid to profile anyway.\n");
-		return -1;
+		err = -1;
+		goto out_delete_session;
 	}
 
 	rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
@@ -533,7 +585,8 @@
 	machine = perf_session__find_host_machine(session);
 	if (!machine) {
 		pr_err("Couldn't find native kernel information.\n");
-		return -1;
+		err = -1;
+		goto out_delete_session;
 	}
 
 	if (opts->pipe_output) {
@@ -541,14 +594,14 @@
 						   process_synthesized_event);
 		if (err < 0) {
 			pr_err("Couldn't synthesize attrs.\n");
-			return err;
+			goto out_delete_session;
 		}
 
 		err = perf_event__synthesize_event_types(tool, process_synthesized_event,
 							 machine);
 		if (err < 0) {
 			pr_err("Couldn't synthesize event_types.\n");
-			return err;
+			goto out_delete_session;
 		}
 
 		if (have_tracepoints(&evsel_list->entries)) {
@@ -564,7 +617,7 @@
 								  process_synthesized_event);
 			if (err <= 0) {
 				pr_err("Couldn't record tracing data.\n");
-				return err;
+				goto out_delete_session;
 			}
 			advance_output(rec, err);
 		}
@@ -592,20 +645,24 @@
 					       perf_event__synthesize_guest_os);
 
 	if (!opts->target.system_wide)
-		perf_event__synthesize_thread_map(tool, evsel_list->threads,
+		err = perf_event__synthesize_thread_map(tool, evsel_list->threads,
 						  process_synthesized_event,
 						  machine);
 	else
-		perf_event__synthesize_threads(tool, process_synthesized_event,
+		err = perf_event__synthesize_threads(tool, process_synthesized_event,
 					       machine);
 
+	if (err != 0)
+		goto out_delete_session;
+
 	if (rec->realtime_prio) {
 		struct sched_param param;
 
 		param.sched_priority = rec->realtime_prio;
 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
 			pr_err("Could not set realtime priority.\n");
-			exit(-1);
+			err = -1;
+			goto out_delete_session;
 		}
 	}
 
@@ -620,7 +677,10 @@
 	for (;;) {
 		int hits = rec->samples;
 
-		perf_record__mmap_read_all(rec);
+		if (perf_record__mmap_read_all(rec) < 0) {
+			err = -1;
+			goto out_delete_session;
+		}
 
 		if (hits == rec->samples) {
 			if (done)
@@ -767,7 +827,7 @@
 #endif /* !NO_LIBUNWIND_SUPPORT */
 
 static int
-parse_callchain_opt(const struct option *opt __used, const char *arg,
+parse_callchain_opt(const struct option *opt __maybe_unused, const char *arg,
 		    int unset)
 {
 	struct perf_record *rec = (struct perf_record *)opt->value;
@@ -943,7 +1003,7 @@
 	OPT_END()
 };
 
-int cmd_record(int argc, const char **argv, const char *prefix __used)
+int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
 {
 	int err = -ENOMEM;
 	struct perf_evsel *pos;
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index d618253..1da243d 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -93,7 +93,7 @@
 			struct annotation *notes;
 			err = -ENOMEM;
 			bx = he->branch_info;
-			if (bx->from.sym && use_browser > 0) {
+			if (bx->from.sym && use_browser == 1 && sort__has_sym) {
 				notes = symbol__annotation(bx->from.sym);
 				if (!notes->src
 				    && symbol__alloc_hist(bx->from.sym) < 0)
@@ -107,7 +107,7 @@
 					goto out;
 			}
 
-			if (bx->to.sym && use_browser > 0) {
+			if (bx->to.sym && use_browser == 1 && sort__has_sym) {
 				notes = symbol__annotation(bx->to.sym);
 				if (!notes->src
 				    && symbol__alloc_hist(bx->to.sym) < 0)
@@ -162,7 +162,7 @@
 	 * so we don't allocated the extra space needed because the stdio
 	 * code will not use it.
 	 */
-	if (he->ms.sym != NULL && use_browser > 0) {
+	if (he->ms.sym != NULL && use_browser == 1 && sort__has_sym) {
 		struct annotation *notes = symbol__annotation(he->ms.sym);
 
 		assert(evsel != NULL);
@@ -223,9 +223,9 @@
 
 static int process_read_event(struct perf_tool *tool,
 			      union perf_event *event,
-			      struct perf_sample *sample __used,
+			      struct perf_sample *sample __maybe_unused,
 			      struct perf_evsel *evsel,
-			      struct machine *machine __used)
+			      struct machine *machine __maybe_unused)
 {
 	struct perf_report *rep = container_of(tool, struct perf_report, tool);
 
@@ -287,7 +287,7 @@
 
 extern volatile int session_done;
 
-static void sig_handler(int sig __used)
+static void sig_handler(int sig __maybe_unused)
 {
 	session_done = 1;
 }
@@ -533,13 +533,14 @@
 }
 
 static int
-parse_branch_mode(const struct option *opt __used, const char *str __used, int unset)
+parse_branch_mode(const struct option *opt __maybe_unused,
+		  const char *str __maybe_unused, int unset)
 {
 	sort__branch_mode = !unset;
 	return 0;
 }
 
-int cmd_report(int argc, const char **argv, const char *prefix __used)
+int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 {
 	struct perf_session *session;
 	struct stat st;
@@ -638,6 +639,8 @@
 		    "Show a column with the sum of periods"),
 	OPT_CALLBACK_NOOPT('b', "branch-stack", &sort__branch_mode, "",
 		    "use branch records for histogram filling", parse_branch_mode),
+	OPT_STRING(0, "objdump", &objdump_path, "path",
+		   "objdump binary to use for disassembly and annotations"),
 	OPT_END()
 	};
 
@@ -686,15 +689,19 @@
 
 	if (strcmp(report.input_name, "-") != 0)
 		setup_browser(true);
-	else
+	else {
 		use_browser = 0;
+		perf_hpp__init(false, false);
+	}
+
+	setup_sorting(report_usage, options);
 
 	/*
 	 * Only in the newt browser we are doing integrated annotation,
 	 * so don't allocate extra space that won't be used in the stdio
 	 * implementation.
 	 */
-	if (use_browser > 0) {
+	if (use_browser == 1 && sort__has_sym) {
 		symbol_conf.priv_size = sizeof(struct annotation);
 		report.annotate_init  = symbol__annotate_init;
 		/*
@@ -717,8 +724,6 @@
 	if (symbol__init() < 0)
 		goto error;
 
-	setup_sorting(report_usage, options);
-
 	if (parent_pattern != default_parent_pattern) {
 		if (sort_dimension__add("parent") < 0)
 			goto error;
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index a25a023..9b9e32e 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -23,26 +23,12 @@
 #include <pthread.h>
 #include <math.h>
 
-static const char		*input_name;
-
-static char			default_sort_order[] = "avg, max, switch, runtime";
-static const char		*sort_order = default_sort_order;
-
-static int			profile_cpu = -1;
-
 #define PR_SET_NAME		15               /* Set process name */
 #define MAX_CPUS		4096
-
-static u64			run_measurement_overhead;
-static u64			sleep_measurement_overhead;
-
 #define COMM_LEN		20
 #define SYM_LEN			129
-
 #define MAX_PID			65536
 
-static unsigned long		nr_tasks;
-
 struct sched_atom;
 
 struct task_desc {
@@ -80,44 +66,6 @@
 	struct task_desc	*wakee;
 };
 
-static struct task_desc		*pid_to_task[MAX_PID];
-
-static struct task_desc		**tasks;
-
-static pthread_mutex_t		start_work_mutex = PTHREAD_MUTEX_INITIALIZER;
-static u64			start_time;
-
-static pthread_mutex_t		work_done_wait_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-static unsigned long		nr_run_events;
-static unsigned long		nr_sleep_events;
-static unsigned long		nr_wakeup_events;
-
-static unsigned long		nr_sleep_corrections;
-static unsigned long		nr_run_events_optimized;
-
-static unsigned long		targetless_wakeups;
-static unsigned long		multitarget_wakeups;
-
-static u64			cpu_usage;
-static u64			runavg_cpu_usage;
-static u64			parent_cpu_usage;
-static u64			runavg_parent_cpu_usage;
-
-static unsigned long		nr_runs;
-static u64			sum_runtime;
-static u64			sum_fluct;
-static u64			run_avg;
-
-static unsigned int		replay_repeat = 10;
-static unsigned long		nr_timestamps;
-static unsigned long		nr_unordered_timestamps;
-static unsigned long		nr_state_machine_bugs;
-static unsigned long		nr_context_switch_bugs;
-static unsigned long		nr_events;
-static unsigned long		nr_lost_chunks;
-static unsigned long		nr_lost_events;
-
 #define TASK_STATE_TO_CHAR_STR "RSDTtZX"
 
 enum thread_state {
@@ -149,11 +97,79 @@
 
 typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *);
 
-static struct rb_root		atom_root, sorted_atom_root;
+struct perf_sched;
 
-static u64			all_runtime;
-static u64			all_count;
+struct trace_sched_handler {
+	int (*switch_event)(struct perf_sched *sched, struct perf_evsel *evsel,
+			    struct perf_sample *sample, struct machine *machine);
 
+	int (*runtime_event)(struct perf_sched *sched, struct perf_evsel *evsel,
+			     struct perf_sample *sample, struct machine *machine);
+
+	int (*wakeup_event)(struct perf_sched *sched, struct perf_evsel *evsel,
+			    struct perf_sample *sample, struct machine *machine);
+
+	int (*fork_event)(struct perf_sched *sched, struct perf_evsel *evsel,
+			  struct perf_sample *sample);
+
+	int (*migrate_task_event)(struct perf_sched *sched,
+				  struct perf_evsel *evsel,
+				  struct perf_sample *sample,
+				  struct machine *machine);
+};
+
+struct perf_sched {
+	struct perf_tool tool;
+	const char	 *input_name;
+	const char	 *sort_order;
+	unsigned long	 nr_tasks;
+	struct task_desc *pid_to_task[MAX_PID];
+	struct task_desc **tasks;
+	const struct trace_sched_handler *tp_handler;
+	pthread_mutex_t	 start_work_mutex;
+	pthread_mutex_t	 work_done_wait_mutex;
+	int		 profile_cpu;
+/*
+ * Track the current task - that way we can know whether there's any
+ * weird events, such as a task being switched away that is not current.
+ */
+	int		 max_cpu;
+	u32		 curr_pid[MAX_CPUS];
+	struct thread	 *curr_thread[MAX_CPUS];
+	char		 next_shortname1;
+	char		 next_shortname2;
+	unsigned int	 replay_repeat;
+	unsigned long	 nr_run_events;
+	unsigned long	 nr_sleep_events;
+	unsigned long	 nr_wakeup_events;
+	unsigned long	 nr_sleep_corrections;
+	unsigned long	 nr_run_events_optimized;
+	unsigned long	 targetless_wakeups;
+	unsigned long	 multitarget_wakeups;
+	unsigned long	 nr_runs;
+	unsigned long	 nr_timestamps;
+	unsigned long	 nr_unordered_timestamps;
+	unsigned long	 nr_state_machine_bugs;
+	unsigned long	 nr_context_switch_bugs;
+	unsigned long	 nr_events;
+	unsigned long	 nr_lost_chunks;
+	unsigned long	 nr_lost_events;
+	u64		 run_measurement_overhead;
+	u64		 sleep_measurement_overhead;
+	u64		 start_time;
+	u64		 cpu_usage;
+	u64		 runavg_cpu_usage;
+	u64		 parent_cpu_usage;
+	u64		 runavg_parent_cpu_usage;
+	u64		 sum_runtime;
+	u64		 sum_fluct;
+	u64		 run_avg;
+	u64		 all_runtime;
+	u64		 all_count;
+	u64		 cpu_last_switched[MAX_CPUS];
+	struct rb_root	 atom_root, sorted_atom_root;
+	struct list_head sort_list, cmp_pid;
+};
 
 static u64 get_nsecs(void)
 {
@@ -164,13 +180,13 @@
 	return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
 }
 
-static void burn_nsecs(u64 nsecs)
+static void burn_nsecs(struct perf_sched *sched, u64 nsecs)
 {
 	u64 T0 = get_nsecs(), T1;
 
 	do {
 		T1 = get_nsecs();
-	} while (T1 + run_measurement_overhead < T0 + nsecs);
+	} while (T1 + sched->run_measurement_overhead < T0 + nsecs);
 }
 
 static void sleep_nsecs(u64 nsecs)
@@ -183,24 +199,24 @@
 	nanosleep(&ts, NULL);
 }
 
-static void calibrate_run_measurement_overhead(void)
+static void calibrate_run_measurement_overhead(struct perf_sched *sched)
 {
 	u64 T0, T1, delta, min_delta = 1000000000ULL;
 	int i;
 
 	for (i = 0; i < 10; i++) {
 		T0 = get_nsecs();
-		burn_nsecs(0);
+		burn_nsecs(sched, 0);
 		T1 = get_nsecs();
 		delta = T1-T0;
 		min_delta = min(min_delta, delta);
 	}
-	run_measurement_overhead = min_delta;
+	sched->run_measurement_overhead = min_delta;
 
 	printf("run measurement overhead: %" PRIu64 " nsecs\n", min_delta);
 }
 
-static void calibrate_sleep_measurement_overhead(void)
+static void calibrate_sleep_measurement_overhead(struct perf_sched *sched)
 {
 	u64 T0, T1, delta, min_delta = 1000000000ULL;
 	int i;
@@ -213,7 +229,7 @@
 		min_delta = min(min_delta, delta);
 	}
 	min_delta -= 10000;
-	sleep_measurement_overhead = min_delta;
+	sched->sleep_measurement_overhead = min_delta;
 
 	printf("sleep measurement overhead: %" PRIu64 " nsecs\n", min_delta);
 }
@@ -246,8 +262,8 @@
 	return task->atoms[task->nr_events - 1];
 }
 
-static void
-add_sched_event_run(struct task_desc *task, u64 timestamp, u64 duration)
+static void add_sched_event_run(struct perf_sched *sched, struct task_desc *task,
+				u64 timestamp, u64 duration)
 {
 	struct sched_atom *event, *curr_event = last_event(task);
 
@@ -256,7 +272,7 @@
 	 * to it:
 	 */
 	if (curr_event && curr_event->type == SCHED_EVENT_RUN) {
-		nr_run_events_optimized++;
+		sched->nr_run_events_optimized++;
 		curr_event->duration += duration;
 		return;
 	}
@@ -266,12 +282,11 @@
 	event->type = SCHED_EVENT_RUN;
 	event->duration = duration;
 
-	nr_run_events++;
+	sched->nr_run_events++;
 }
 
-static void
-add_sched_event_wakeup(struct task_desc *task, u64 timestamp,
-		       struct task_desc *wakee)
+static void add_sched_event_wakeup(struct perf_sched *sched, struct task_desc *task,
+				   u64 timestamp, struct task_desc *wakee)
 {
 	struct sched_atom *event, *wakee_event;
 
@@ -281,11 +296,11 @@
 
 	wakee_event = last_event(wakee);
 	if (!wakee_event || wakee_event->type != SCHED_EVENT_SLEEP) {
-		targetless_wakeups++;
+		sched->targetless_wakeups++;
 		return;
 	}
 	if (wakee_event->wait_sem) {
-		multitarget_wakeups++;
+		sched->multitarget_wakeups++;
 		return;
 	}
 
@@ -294,89 +309,89 @@
 	wakee_event->specific_wait = 1;
 	event->wait_sem = wakee_event->wait_sem;
 
-	nr_wakeup_events++;
+	sched->nr_wakeup_events++;
 }
 
-static void
-add_sched_event_sleep(struct task_desc *task, u64 timestamp,
-		      u64 task_state __used)
+static void add_sched_event_sleep(struct perf_sched *sched, struct task_desc *task,
+				  u64 timestamp, u64 task_state __maybe_unused)
 {
 	struct sched_atom *event = get_new_event(task, timestamp);
 
 	event->type = SCHED_EVENT_SLEEP;
 
-	nr_sleep_events++;
+	sched->nr_sleep_events++;
 }
 
-static struct task_desc *register_pid(unsigned long pid, const char *comm)
+static struct task_desc *register_pid(struct perf_sched *sched,
+				      unsigned long pid, const char *comm)
 {
 	struct task_desc *task;
 
 	BUG_ON(pid >= MAX_PID);
 
-	task = pid_to_task[pid];
+	task = sched->pid_to_task[pid];
 
 	if (task)
 		return task;
 
 	task = zalloc(sizeof(*task));
 	task->pid = pid;
-	task->nr = nr_tasks;
+	task->nr = sched->nr_tasks;
 	strcpy(task->comm, comm);
 	/*
 	 * every task starts in sleeping state - this gets ignored
 	 * if there's no wakeup pointing to this sleep state:
 	 */
-	add_sched_event_sleep(task, 0, 0);
+	add_sched_event_sleep(sched, task, 0, 0);
 
-	pid_to_task[pid] = task;
-	nr_tasks++;
-	tasks = realloc(tasks, nr_tasks*sizeof(struct task_task *));
-	BUG_ON(!tasks);
-	tasks[task->nr] = task;
+	sched->pid_to_task[pid] = task;
+	sched->nr_tasks++;
+	sched->tasks = realloc(sched->tasks, sched->nr_tasks * sizeof(struct task_task *));
+	BUG_ON(!sched->tasks);
+	sched->tasks[task->nr] = task;
 
 	if (verbose)
-		printf("registered task #%ld, PID %ld (%s)\n", nr_tasks, pid, comm);
+		printf("registered task #%ld, PID %ld (%s)\n", sched->nr_tasks, pid, comm);
 
 	return task;
 }
 
 
-static void print_task_traces(void)
+static void print_task_traces(struct perf_sched *sched)
 {
 	struct task_desc *task;
 	unsigned long i;
 
-	for (i = 0; i < nr_tasks; i++) {
-		task = tasks[i];
+	for (i = 0; i < sched->nr_tasks; i++) {
+		task = sched->tasks[i];
 		printf("task %6ld (%20s:%10ld), nr_events: %ld\n",
 			task->nr, task->comm, task->pid, task->nr_events);
 	}
 }
 
-static void add_cross_task_wakeups(void)
+static void add_cross_task_wakeups(struct perf_sched *sched)
 {
 	struct task_desc *task1, *task2;
 	unsigned long i, j;
 
-	for (i = 0; i < nr_tasks; i++) {
-		task1 = tasks[i];
+	for (i = 0; i < sched->nr_tasks; i++) {
+		task1 = sched->tasks[i];
 		j = i + 1;
-		if (j == nr_tasks)
+		if (j == sched->nr_tasks)
 			j = 0;
-		task2 = tasks[j];
-		add_sched_event_wakeup(task1, 0, task2);
+		task2 = sched->tasks[j];
+		add_sched_event_wakeup(sched, task1, 0, task2);
 	}
 }
 
-static void
-process_sched_event(struct task_desc *this_task __used, struct sched_atom *atom)
+static void perf_sched__process_event(struct perf_sched *sched,
+				      struct sched_atom *atom)
 {
 	int ret = 0;
 
 	switch (atom->type) {
 		case SCHED_EVENT_RUN:
-			burn_nsecs(atom->duration);
+			burn_nsecs(sched, atom->duration);
 			break;
 		case SCHED_EVENT_SLEEP:
 			if (atom->wait_sem)
@@ -423,8 +438,8 @@
 	fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
 
 	if (fd < 0)
-		die("Error: sys_perf_event_open() syscall returned"
-		    "with %d (%s)\n", fd, strerror(errno));
+		pr_err("Error: sys_perf_event_open() syscall returned "
+		       "with %d (%s)\n", fd, strerror(errno));
 	return fd;
 }
 
@@ -439,31 +454,41 @@
 	return runtime;
 }
 
+struct sched_thread_parms {
+	struct task_desc  *task;
+	struct perf_sched *sched;
+};
+
 static void *thread_func(void *ctx)
 {
-	struct task_desc *this_task = ctx;
+	struct sched_thread_parms *parms = ctx;
+	struct task_desc *this_task = parms->task;
+	struct perf_sched *sched = parms->sched;
 	u64 cpu_usage_0, cpu_usage_1;
 	unsigned long i, ret;
 	char comm2[22];
 	int fd;
 
+	free(parms);
+
 	sprintf(comm2, ":%s", this_task->comm);
 	prctl(PR_SET_NAME, comm2);
 	fd = self_open_counters();
-
+	if (fd < 0)
+		return NULL;
 again:
 	ret = sem_post(&this_task->ready_for_work);
 	BUG_ON(ret);
-	ret = pthread_mutex_lock(&start_work_mutex);
+	ret = pthread_mutex_lock(&sched->start_work_mutex);
 	BUG_ON(ret);
-	ret = pthread_mutex_unlock(&start_work_mutex);
+	ret = pthread_mutex_unlock(&sched->start_work_mutex);
 	BUG_ON(ret);
 
 	cpu_usage_0 = get_cpu_usage_nsec_self(fd);
 
 	for (i = 0; i < this_task->nr_events; i++) {
 		this_task->curr_event = i;
-		process_sched_event(this_task, this_task->atoms[i]);
+		perf_sched__process_event(sched, this_task->atoms[i]);
 	}
 
 	cpu_usage_1 = get_cpu_usage_nsec_self(fd);
@@ -471,15 +496,15 @@
 	ret = sem_post(&this_task->work_done_sem);
 	BUG_ON(ret);
 
-	ret = pthread_mutex_lock(&work_done_wait_mutex);
+	ret = pthread_mutex_lock(&sched->work_done_wait_mutex);
 	BUG_ON(ret);
-	ret = pthread_mutex_unlock(&work_done_wait_mutex);
+	ret = pthread_mutex_unlock(&sched->work_done_wait_mutex);
 	BUG_ON(ret);
 
 	goto again;
 }
 
-static void create_tasks(void)
+static void create_tasks(struct perf_sched *sched)
 {
 	struct task_desc *task;
 	pthread_attr_t attr;
@@ -491,128 +516,129 @@
 	err = pthread_attr_setstacksize(&attr,
 			(size_t) max(16 * 1024, PTHREAD_STACK_MIN));
 	BUG_ON(err);
-	err = pthread_mutex_lock(&start_work_mutex);
+	err = pthread_mutex_lock(&sched->start_work_mutex);
 	BUG_ON(err);
-	err = pthread_mutex_lock(&work_done_wait_mutex);
+	err = pthread_mutex_lock(&sched->work_done_wait_mutex);
 	BUG_ON(err);
-	for (i = 0; i < nr_tasks; i++) {
-		task = tasks[i];
+	for (i = 0; i < sched->nr_tasks; i++) {
+		struct sched_thread_parms *parms = malloc(sizeof(*parms));
+		BUG_ON(parms == NULL);
+		parms->task = task = sched->tasks[i];
+		parms->sched = sched;
 		sem_init(&task->sleep_sem, 0, 0);
 		sem_init(&task->ready_for_work, 0, 0);
 		sem_init(&task->work_done_sem, 0, 0);
 		task->curr_event = 0;
-		err = pthread_create(&task->thread, &attr, thread_func, task);
+		err = pthread_create(&task->thread, &attr, thread_func, parms);
 		BUG_ON(err);
 	}
 }
 
-static void wait_for_tasks(void)
+static void wait_for_tasks(struct perf_sched *sched)
 {
 	u64 cpu_usage_0, cpu_usage_1;
 	struct task_desc *task;
 	unsigned long i, ret;
 
-	start_time = get_nsecs();
-	cpu_usage = 0;
-	pthread_mutex_unlock(&work_done_wait_mutex);
+	sched->start_time = get_nsecs();
+	sched->cpu_usage = 0;
+	pthread_mutex_unlock(&sched->work_done_wait_mutex);
 
-	for (i = 0; i < nr_tasks; i++) {
-		task = tasks[i];
+	for (i = 0; i < sched->nr_tasks; i++) {
+		task = sched->tasks[i];
 		ret = sem_wait(&task->ready_for_work);
 		BUG_ON(ret);
 		sem_init(&task->ready_for_work, 0, 0);
 	}
-	ret = pthread_mutex_lock(&work_done_wait_mutex);
+	ret = pthread_mutex_lock(&sched->work_done_wait_mutex);
 	BUG_ON(ret);
 
 	cpu_usage_0 = get_cpu_usage_nsec_parent();
 
-	pthread_mutex_unlock(&start_work_mutex);
+	pthread_mutex_unlock(&sched->start_work_mutex);
 
-	for (i = 0; i < nr_tasks; i++) {
-		task = tasks[i];
+	for (i = 0; i < sched->nr_tasks; i++) {
+		task = sched->tasks[i];
 		ret = sem_wait(&task->work_done_sem);
 		BUG_ON(ret);
 		sem_init(&task->work_done_sem, 0, 0);
-		cpu_usage += task->cpu_usage;
+		sched->cpu_usage += task->cpu_usage;
 		task->cpu_usage = 0;
 	}
 
 	cpu_usage_1 = get_cpu_usage_nsec_parent();
-	if (!runavg_cpu_usage)
-		runavg_cpu_usage = cpu_usage;
-	runavg_cpu_usage = (runavg_cpu_usage*9 + cpu_usage)/10;
+	if (!sched->runavg_cpu_usage)
+		sched->runavg_cpu_usage = sched->cpu_usage;
+	sched->runavg_cpu_usage = (sched->runavg_cpu_usage * 9 + sched->cpu_usage) / 10;
 
-	parent_cpu_usage = cpu_usage_1 - cpu_usage_0;
-	if (!runavg_parent_cpu_usage)
-		runavg_parent_cpu_usage = parent_cpu_usage;
-	runavg_parent_cpu_usage = (runavg_parent_cpu_usage*9 +
-				   parent_cpu_usage)/10;
+	sched->parent_cpu_usage = cpu_usage_1 - cpu_usage_0;
+	if (!sched->runavg_parent_cpu_usage)
+		sched->runavg_parent_cpu_usage = sched->parent_cpu_usage;
+	sched->runavg_parent_cpu_usage = (sched->runavg_parent_cpu_usage * 9 +
+					 sched->parent_cpu_usage)/10;
 
-	ret = pthread_mutex_lock(&start_work_mutex);
+	ret = pthread_mutex_lock(&sched->start_work_mutex);
 	BUG_ON(ret);
 
-	for (i = 0; i < nr_tasks; i++) {
-		task = tasks[i];
+	for (i = 0; i < sched->nr_tasks; i++) {
+		task = sched->tasks[i];
 		sem_init(&task->sleep_sem, 0, 0);
 		task->curr_event = 0;
 	}
 }
 
-static void run_one_test(void)
+static void run_one_test(struct perf_sched *sched)
 {
 	u64 T0, T1, delta, avg_delta, fluct;
 
 	T0 = get_nsecs();
-	wait_for_tasks();
+	wait_for_tasks(sched);
 	T1 = get_nsecs();
 
 	delta = T1 - T0;
-	sum_runtime += delta;
-	nr_runs++;
+	sched->sum_runtime += delta;
+	sched->nr_runs++;
 
-	avg_delta = sum_runtime / nr_runs;
+	avg_delta = sched->sum_runtime / sched->nr_runs;
 	if (delta < avg_delta)
 		fluct = avg_delta - delta;
 	else
 		fluct = delta - avg_delta;
-	sum_fluct += fluct;
-	if (!run_avg)
-		run_avg = delta;
-	run_avg = (run_avg*9 + delta)/10;
+	sched->sum_fluct += fluct;
+	if (!sched->run_avg)
+		sched->run_avg = delta;
+	sched->run_avg = (sched->run_avg * 9 + delta) / 10;
 
-	printf("#%-3ld: %0.3f, ",
-		nr_runs, (double)delta/1000000.0);
+	printf("#%-3ld: %0.3f, ", sched->nr_runs, (double)delta / 1000000.0);
 
-	printf("ravg: %0.2f, ",
-		(double)run_avg/1e6);
+	printf("ravg: %0.2f, ", (double)sched->run_avg / 1e6);
 
 	printf("cpu: %0.2f / %0.2f",
-		(double)cpu_usage/1e6, (double)runavg_cpu_usage/1e6);
+		(double)sched->cpu_usage / 1e6, (double)sched->runavg_cpu_usage / 1e6);
 
 #if 0
 	/*
 	 * rusage statistics done by the parent, these are less
-	 * accurate than the sum_exec_runtime based statistics:
+	 * accurate than the sched->sum_exec_runtime based statistics:
 	 */
 	printf(" [%0.2f / %0.2f]",
-		(double)parent_cpu_usage/1e6,
-		(double)runavg_parent_cpu_usage/1e6);
+		(double)sched->parent_cpu_usage/1e6,
+		(double)sched->runavg_parent_cpu_usage/1e6);
 #endif
 
 	printf("\n");
 
-	if (nr_sleep_corrections)
-		printf(" (%ld sleep corrections)\n", nr_sleep_corrections);
-	nr_sleep_corrections = 0;
+	if (sched->nr_sleep_corrections)
+		printf(" (%ld sleep corrections)\n", sched->nr_sleep_corrections);
+	sched->nr_sleep_corrections = 0;
 }
 
-static void test_calibrations(void)
+static void test_calibrations(struct perf_sched *sched)
 {
 	u64 T0, T1;
 
 	T0 = get_nsecs();
-	burn_nsecs(1e6);
+	burn_nsecs(sched, 1e6);
 	T1 = get_nsecs();
 
 	printf("the run test took %" PRIu64 " nsecs\n", T1 - T0);
@@ -624,216 +650,92 @@
 	printf("the sleep test took %" PRIu64 " nsecs\n", T1 - T0);
 }
 
-#define FILL_FIELD(ptr, field, event, data)	\
-	ptr.field = (typeof(ptr.field)) raw_field_value(event, #field, data)
-
-#define FILL_ARRAY(ptr, array, event, data)			\
-do {								\
-	void *__array = raw_field_ptr(event, #array, data);	\
-	memcpy(ptr.array, __array, sizeof(ptr.array));	\
-} while(0)
-
-#define FILL_COMMON_FIELDS(ptr, event, data)			\
-do {								\
-	FILL_FIELD(ptr, common_type, event, data);		\
-	FILL_FIELD(ptr, common_flags, event, data);		\
-	FILL_FIELD(ptr, common_preempt_count, event, data);	\
-	FILL_FIELD(ptr, common_pid, event, data);		\
-	FILL_FIELD(ptr, common_tgid, event, data);		\
-} while (0)
-
-
-
-struct trace_switch_event {
-	u32 size;
-
-	u16 common_type;
-	u8 common_flags;
-	u8 common_preempt_count;
-	u32 common_pid;
-	u32 common_tgid;
-
-	char prev_comm[16];
-	u32 prev_pid;
-	u32 prev_prio;
-	u64 prev_state;
-	char next_comm[16];
-	u32 next_pid;
-	u32 next_prio;
-};
-
-struct trace_runtime_event {
-	u32 size;
-
-	u16 common_type;
-	u8 common_flags;
-	u8 common_preempt_count;
-	u32 common_pid;
-	u32 common_tgid;
-
-	char comm[16];
-	u32 pid;
-	u64 runtime;
-	u64 vruntime;
-};
-
-struct trace_wakeup_event {
-	u32 size;
-
-	u16 common_type;
-	u8 common_flags;
-	u8 common_preempt_count;
-	u32 common_pid;
-	u32 common_tgid;
-
-	char comm[16];
-	u32 pid;
-
-	u32 prio;
-	u32 success;
-	u32 cpu;
-};
-
-struct trace_fork_event {
-	u32 size;
-
-	u16 common_type;
-	u8 common_flags;
-	u8 common_preempt_count;
-	u32 common_pid;
-	u32 common_tgid;
-
-	char parent_comm[16];
-	u32 parent_pid;
-	char child_comm[16];
-	u32 child_pid;
-};
-
-struct trace_migrate_task_event {
-	u32 size;
-
-	u16 common_type;
-	u8 common_flags;
-	u8 common_preempt_count;
-	u32 common_pid;
-	u32 common_tgid;
-
-	char comm[16];
-	u32 pid;
-
-	u32 prio;
-	u32 cpu;
-};
-
-struct trace_sched_handler {
-	void (*switch_event)(struct trace_switch_event *,
-			     struct machine *,
-			     struct event_format *,
-			     struct perf_sample *sample);
-
-	void (*runtime_event)(struct trace_runtime_event *,
-			      struct machine *,
-			      struct perf_sample *sample);
-
-	void (*wakeup_event)(struct trace_wakeup_event *,
-			     struct machine *,
-			     struct event_format *,
-			     struct perf_sample *sample);
-
-	void (*fork_event)(struct trace_fork_event *,
-			   struct event_format *event);
-
-	void (*migrate_task_event)(struct trace_migrate_task_event *,
-				   struct machine *machine,
-				   struct perf_sample *sample);
-};
-
-
-static void
-replay_wakeup_event(struct trace_wakeup_event *wakeup_event,
-		    struct machine *machine __used,
-		    struct event_format *event, struct perf_sample *sample)
+static int
+replay_wakeup_event(struct perf_sched *sched,
+		    struct perf_evsel *evsel, struct perf_sample *sample,
+		    struct machine *machine __maybe_unused)
 {
+	const char *comm = perf_evsel__strval(evsel, sample, "comm");
+	const u32 pid	 = perf_evsel__intval(evsel, sample, "pid");
 	struct task_desc *waker, *wakee;
 
 	if (verbose) {
-		printf("sched_wakeup event %p\n", event);
+		printf("sched_wakeup event %p\n", evsel);
 
-		printf(" ... pid %d woke up %s/%d\n",
-			wakeup_event->common_pid,
-			wakeup_event->comm,
-			wakeup_event->pid);
+		printf(" ... pid %d woke up %s/%d\n", sample->tid, comm, pid);
 	}
 
-	waker = register_pid(wakeup_event->common_pid, "<unknown>");
-	wakee = register_pid(wakeup_event->pid, wakeup_event->comm);
+	waker = register_pid(sched, sample->tid, "<unknown>");
+	wakee = register_pid(sched, pid, comm);
 
-	add_sched_event_wakeup(waker, sample->time, wakee);
+	add_sched_event_wakeup(sched, waker, sample->time, wakee);
+	return 0;
 }
 
-static u64 cpu_last_switched[MAX_CPUS];
-
-static void
-replay_switch_event(struct trace_switch_event *switch_event,
-		    struct machine *machine __used,
-		    struct event_format *event,
-		    struct perf_sample *sample)
+static int replay_switch_event(struct perf_sched *sched,
+			       struct perf_evsel *evsel,
+			       struct perf_sample *sample,
+			       struct machine *machine __maybe_unused)
 {
-	struct task_desc *prev, __used *next;
+	const char *prev_comm  = perf_evsel__strval(evsel, sample, "prev_comm"),
+		   *next_comm  = perf_evsel__strval(evsel, sample, "next_comm");
+	const u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"),
+		  next_pid = perf_evsel__intval(evsel, sample, "next_pid");
+	const u64 prev_state = perf_evsel__intval(evsel, sample, "prev_state");
+	struct task_desc *prev, __maybe_unused *next;
 	u64 timestamp0, timestamp = sample->time;
 	int cpu = sample->cpu;
 	s64 delta;
 
 	if (verbose)
-		printf("sched_switch event %p\n", event);
+		printf("sched_switch event %p\n", evsel);
 
 	if (cpu >= MAX_CPUS || cpu < 0)
-		return;
+		return 0;
 
-	timestamp0 = cpu_last_switched[cpu];
+	timestamp0 = sched->cpu_last_switched[cpu];
 	if (timestamp0)
 		delta = timestamp - timestamp0;
 	else
 		delta = 0;
 
-	if (delta < 0)
-		die("hm, delta: %" PRIu64 " < 0 ?\n", delta);
-
-	if (verbose) {
-		printf(" ... switch from %s/%d to %s/%d [ran %" PRIu64 " nsecs]\n",
-			switch_event->prev_comm, switch_event->prev_pid,
-			switch_event->next_comm, switch_event->next_pid,
-			delta);
+	if (delta < 0) {
+		pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta);
+		return -1;
 	}
 
-	prev = register_pid(switch_event->prev_pid, switch_event->prev_comm);
-	next = register_pid(switch_event->next_pid, switch_event->next_comm);
+	pr_debug(" ... switch from %s/%d to %s/%d [ran %" PRIu64 " nsecs]\n",
+		 prev_comm, prev_pid, next_comm, next_pid, delta);
 
-	cpu_last_switched[cpu] = timestamp;
+	prev = register_pid(sched, prev_pid, prev_comm);
+	next = register_pid(sched, next_pid, next_comm);
 
-	add_sched_event_run(prev, timestamp, delta);
-	add_sched_event_sleep(prev, timestamp, switch_event->prev_state);
+	sched->cpu_last_switched[cpu] = timestamp;
+
+	add_sched_event_run(sched, prev, timestamp, delta);
+	add_sched_event_sleep(sched, prev, timestamp, prev_state);
+
+	return 0;
 }
 
-
-static void
-replay_fork_event(struct trace_fork_event *fork_event,
-		  struct event_format *event)
+static int replay_fork_event(struct perf_sched *sched, struct perf_evsel *evsel,
+			     struct perf_sample *sample)
 {
-	if (verbose) {
-		printf("sched_fork event %p\n", event);
-		printf("... parent: %s/%d\n", fork_event->parent_comm, fork_event->parent_pid);
-		printf("...  child: %s/%d\n", fork_event->child_comm, fork_event->child_pid);
-	}
-	register_pid(fork_event->parent_pid, fork_event->parent_comm);
-	register_pid(fork_event->child_pid, fork_event->child_comm);
-}
+	const char *parent_comm = perf_evsel__strval(evsel, sample, "parent_comm"),
+		   *child_comm  = perf_evsel__strval(evsel, sample, "child_comm");
+	const u32 parent_pid  = perf_evsel__intval(evsel, sample, "parent_pid"),
+		  child_pid  = perf_evsel__intval(evsel, sample, "child_pid");
 
-static struct trace_sched_handler replay_ops  = {
-	.wakeup_event		= replay_wakeup_event,
-	.switch_event		= replay_switch_event,
-	.fork_event		= replay_fork_event,
-};
+	if (verbose) {
+		printf("sched_fork event %p\n", evsel);
+		printf("... parent: %s/%d\n", parent_comm, parent_pid);
+		printf("...  child: %s/%d\n", child_comm, child_pid);
+	}
+
+	register_pid(sched, parent_pid, parent_comm);
+	register_pid(sched, child_pid, child_comm);
+	return 0;
+}
 
 struct sort_dimension {
 	const char		*name;
@@ -841,8 +743,6 @@
 	struct list_head	list;
 };
 
-static LIST_HEAD(cmp_pid);
-
 static int
 thread_lat_cmp(struct list_head *list, struct work_atoms *l, struct work_atoms *r)
 {
@@ -911,40 +811,45 @@
 	rb_insert_color(&data->node, root);
 }
 
-static void thread_atoms_insert(struct thread *thread)
+static int thread_atoms_insert(struct perf_sched *sched, struct thread *thread)
 {
 	struct work_atoms *atoms = zalloc(sizeof(*atoms));
-	if (!atoms)
-		die("No memory");
+	if (!atoms) {
+		pr_err("No memory at %s\n", __func__);
+		return -1;
+	}
 
 	atoms->thread = thread;
 	INIT_LIST_HEAD(&atoms->work_list);
-	__thread_latency_insert(&atom_root, atoms, &cmp_pid);
+	__thread_latency_insert(&sched->atom_root, atoms, &sched->cmp_pid);
+	return 0;
 }
 
-static void
-latency_fork_event(struct trace_fork_event *fork_event __used,
-		   struct event_format *event __used)
+static int latency_fork_event(struct perf_sched *sched __maybe_unused,
+			      struct perf_evsel *evsel __maybe_unused,
+			      struct perf_sample *sample __maybe_unused)
 {
 	/* should insert the newcomer */
+	return 0;
 }
 
-__used
-static char sched_out_state(struct trace_switch_event *switch_event)
+static char sched_out_state(u64 prev_state)
 {
 	const char *str = TASK_STATE_TO_CHAR_STR;
 
-	return str[switch_event->prev_state];
+	return str[prev_state];
 }
 
-static void
+static int
 add_sched_out_event(struct work_atoms *atoms,
 		    char run_state,
 		    u64 timestamp)
 {
 	struct work_atom *atom = zalloc(sizeof(*atom));
-	if (!atom)
-		die("Non memory");
+	if (!atom) {
+		pr_err("Non memory at %s", __func__);
+		return -1;
+	}
 
 	atom->sched_out_time = timestamp;
 
@@ -954,10 +859,12 @@
 	}
 
 	list_add_tail(&atom->list, &atoms->work_list);
+	return 0;
 }
 
 static void
-add_runtime_event(struct work_atoms *atoms, u64 delta, u64 timestamp __used)
+add_runtime_event(struct work_atoms *atoms, u64 delta,
+		  u64 timestamp __maybe_unused)
 {
 	struct work_atom *atom;
 
@@ -1000,12 +907,14 @@
 	atoms->nb_atoms++;
 }
 
-static void
-latency_switch_event(struct trace_switch_event *switch_event,
-		     struct machine *machine,
-		     struct event_format *event __used,
-		     struct perf_sample *sample)
+static int latency_switch_event(struct perf_sched *sched,
+				struct perf_evsel *evsel,
+				struct perf_sample *sample,
+				struct machine *machine)
 {
+	const u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"),
+		  next_pid = perf_evsel__intval(evsel, sample, "next_pid");
+	const u64 prev_state = perf_evsel__intval(evsel, sample, "prev_state");
 	struct work_atoms *out_events, *in_events;
 	struct thread *sched_out, *sched_in;
 	u64 timestamp0, timestamp = sample->time;
@@ -1014,87 +923,112 @@
 
 	BUG_ON(cpu >= MAX_CPUS || cpu < 0);
 
-	timestamp0 = cpu_last_switched[cpu];
-	cpu_last_switched[cpu] = timestamp;
+	timestamp0 = sched->cpu_last_switched[cpu];
+	sched->cpu_last_switched[cpu] = timestamp;
 	if (timestamp0)
 		delta = timestamp - timestamp0;
 	else
 		delta = 0;
 
-	if (delta < 0)
-		die("hm, delta: %" PRIu64 " < 0 ?\n", delta);
-
-
-	sched_out = machine__findnew_thread(machine, switch_event->prev_pid);
-	sched_in = machine__findnew_thread(machine, switch_event->next_pid);
-
-	out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid);
-	if (!out_events) {
-		thread_atoms_insert(sched_out);
-		out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid);
-		if (!out_events)
-			die("out-event: Internal tree error");
+	if (delta < 0) {
+		pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta);
+		return -1;
 	}
-	add_sched_out_event(out_events, sched_out_state(switch_event), timestamp);
 
-	in_events = thread_atoms_search(&atom_root, sched_in, &cmp_pid);
+	sched_out = machine__findnew_thread(machine, prev_pid);
+	sched_in = machine__findnew_thread(machine, next_pid);
+
+	out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid);
+	if (!out_events) {
+		if (thread_atoms_insert(sched, sched_out))
+			return -1;
+		out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid);
+		if (!out_events) {
+			pr_err("out-event: Internal tree error");
+			return -1;
+		}
+	}
+	if (add_sched_out_event(out_events, sched_out_state(prev_state), timestamp))
+		return -1;
+
+	in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid);
 	if (!in_events) {
-		thread_atoms_insert(sched_in);
-		in_events = thread_atoms_search(&atom_root, sched_in, &cmp_pid);
-		if (!in_events)
-			die("in-event: Internal tree error");
+		if (thread_atoms_insert(sched, sched_in))
+			return -1;
+		in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid);
+		if (!in_events) {
+			pr_err("in-event: Internal tree error");
+			return -1;
+		}
 		/*
 		 * Take came in we have not heard about yet,
 		 * add in an initial atom in runnable state:
 		 */
-		add_sched_out_event(in_events, 'R', timestamp);
+		if (add_sched_out_event(in_events, 'R', timestamp))
+			return -1;
 	}
 	add_sched_in_event(in_events, timestamp);
+
+	return 0;
 }
 
-static void
-latency_runtime_event(struct trace_runtime_event *runtime_event,
-		      struct machine *machine, struct perf_sample *sample)
+static int latency_runtime_event(struct perf_sched *sched,
+				 struct perf_evsel *evsel,
+				 struct perf_sample *sample,
+				 struct machine *machine)
 {
-	struct thread *thread = machine__findnew_thread(machine, runtime_event->pid);
-	struct work_atoms *atoms = thread_atoms_search(&atom_root, thread, &cmp_pid);
+	const u32 pid	   = perf_evsel__intval(evsel, sample, "pid");
+	const u64 runtime  = perf_evsel__intval(evsel, sample, "runtime");
+	struct thread *thread = machine__findnew_thread(machine, pid);
+	struct work_atoms *atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid);
 	u64 timestamp = sample->time;
 	int cpu = sample->cpu;
 
 	BUG_ON(cpu >= MAX_CPUS || cpu < 0);
 	if (!atoms) {
-		thread_atoms_insert(thread);
-		atoms = thread_atoms_search(&atom_root, thread, &cmp_pid);
-		if (!atoms)
-			die("in-event: Internal tree error");
-		add_sched_out_event(atoms, 'R', timestamp);
+		if (thread_atoms_insert(sched, thread))
+			return -1;
+		atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid);
+		if (!atoms) {
+			pr_err("in-event: Internal tree error");
+			return -1;
+		}
+		if (add_sched_out_event(atoms, 'R', timestamp))
+			return -1;
 	}
 
-	add_runtime_event(atoms, runtime_event->runtime, timestamp);
+	add_runtime_event(atoms, runtime, timestamp);
+	return 0;
 }
 
-static void
-latency_wakeup_event(struct trace_wakeup_event *wakeup_event,
-		     struct machine *machine, struct event_format *event __used,
-		     struct perf_sample *sample)
+static int latency_wakeup_event(struct perf_sched *sched,
+				struct perf_evsel *evsel,
+				struct perf_sample *sample,
+				struct machine *machine)
 {
+	const u32 pid	  = perf_evsel__intval(evsel, sample, "pid"),
+		  success = perf_evsel__intval(evsel, sample, "success");
 	struct work_atoms *atoms;
 	struct work_atom *atom;
 	struct thread *wakee;
 	u64 timestamp = sample->time;
 
 	/* Note for later, it may be interesting to observe the failing cases */
-	if (!wakeup_event->success)
-		return;
+	if (!success)
+		return 0;
 
-	wakee = machine__findnew_thread(machine, wakeup_event->pid);
-	atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid);
+	wakee = machine__findnew_thread(machine, pid);
+	atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid);
 	if (!atoms) {
-		thread_atoms_insert(wakee);
-		atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid);
-		if (!atoms)
-			die("wakeup-event: Internal tree error");
-		add_sched_out_event(atoms, 'S', timestamp);
+		if (thread_atoms_insert(sched, wakee))
+			return -1;
+		atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid);
+		if (!atoms) {
+			pr_err("wakeup-event: Internal tree error");
+			return -1;
+		}
+		if (add_sched_out_event(atoms, 'S', timestamp))
+			return -1;
 	}
 
 	BUG_ON(list_empty(&atoms->work_list));
@@ -1106,23 +1040,26 @@
 	 * one CPU, or are only looking at only one, so don't
 	 * make useless noise.
 	 */
-	if (profile_cpu == -1 && atom->state != THREAD_SLEEPING)
-		nr_state_machine_bugs++;
+	if (sched->profile_cpu == -1 && atom->state != THREAD_SLEEPING)
+		sched->nr_state_machine_bugs++;
 
-	nr_timestamps++;
+	sched->nr_timestamps++;
 	if (atom->sched_out_time > timestamp) {
-		nr_unordered_timestamps++;
-		return;
+		sched->nr_unordered_timestamps++;
+		return 0;
 	}
 
 	atom->state = THREAD_WAIT_CPU;
 	atom->wake_up_time = timestamp;
+	return 0;
 }
 
-static void
-latency_migrate_task_event(struct trace_migrate_task_event *migrate_task_event,
-			   struct machine *machine, struct perf_sample *sample)
+static int latency_migrate_task_event(struct perf_sched *sched,
+				      struct perf_evsel *evsel,
+				      struct perf_sample *sample,
+				      struct machine *machine)
 {
+	const u32 pid = perf_evsel__intval(evsel, sample, "pid");
 	u64 timestamp = sample->time;
 	struct work_atoms *atoms;
 	struct work_atom *atom;
@@ -1131,18 +1068,22 @@
 	/*
 	 * Only need to worry about migration when profiling one CPU.
 	 */
-	if (profile_cpu == -1)
-		return;
+	if (sched->profile_cpu == -1)
+		return 0;
 
-	migrant = machine__findnew_thread(machine, migrate_task_event->pid);
-	atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid);
+	migrant = machine__findnew_thread(machine, pid);
+	atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid);
 	if (!atoms) {
-		thread_atoms_insert(migrant);
-		register_pid(migrant->pid, migrant->comm);
-		atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid);
-		if (!atoms)
-			die("migration-event: Internal tree error");
-		add_sched_out_event(atoms, 'R', timestamp);
+		if (thread_atoms_insert(sched, migrant))
+			return -1;
+		register_pid(sched, migrant->pid, migrant->comm);
+		atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid);
+		if (!atoms) {
+			pr_err("migration-event: Internal tree error");
+			return -1;
+		}
+		if (add_sched_out_event(atoms, 'R', timestamp))
+			return -1;
 	}
 
 	BUG_ON(list_empty(&atoms->work_list));
@@ -1150,21 +1091,15 @@
 	atom = list_entry(atoms->work_list.prev, struct work_atom, list);
 	atom->sched_in_time = atom->sched_out_time = atom->wake_up_time = timestamp;
 
-	nr_timestamps++;
+	sched->nr_timestamps++;
 
 	if (atom->sched_out_time > timestamp)
-		nr_unordered_timestamps++;
+		sched->nr_unordered_timestamps++;
+
+	return 0;
 }
 
-static struct trace_sched_handler lat_ops  = {
-	.wakeup_event		= latency_wakeup_event,
-	.switch_event		= latency_switch_event,
-	.runtime_event		= latency_runtime_event,
-	.fork_event		= latency_fork_event,
-	.migrate_task_event	= latency_migrate_task_event,
-};
-
-static void output_lat_thread(struct work_atoms *work_list)
+static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_list)
 {
 	int i;
 	int ret;
@@ -1178,8 +1113,8 @@
 	if (!strcmp(work_list->thread->comm, "swapper"))
 		return;
 
-	all_runtime += work_list->total_runtime;
-	all_count += work_list->nb_atoms;
+	sched->all_runtime += work_list->total_runtime;
+	sched->all_count   += work_list->nb_atoms;
 
 	ret = printf("  %s:%d ", work_list->thread->comm, work_list->thread->pid);
 
@@ -1205,11 +1140,6 @@
 	return 0;
 }
 
-static struct sort_dimension pid_sort_dimension = {
-	.name			= "pid",
-	.cmp			= pid_cmp,
-};
-
 static int avg_cmp(struct work_atoms *l, struct work_atoms *r)
 {
 	u64 avgl, avgr;
@@ -1231,11 +1161,6 @@
 	return 0;
 }
 
-static struct sort_dimension avg_sort_dimension = {
-	.name			= "avg",
-	.cmp			= avg_cmp,
-};
-
 static int max_cmp(struct work_atoms *l, struct work_atoms *r)
 {
 	if (l->max_lat < r->max_lat)
@@ -1246,11 +1171,6 @@
 	return 0;
 }
 
-static struct sort_dimension max_sort_dimension = {
-	.name			= "max",
-	.cmp			= max_cmp,
-};
-
 static int switch_cmp(struct work_atoms *l, struct work_atoms *r)
 {
 	if (l->nb_atoms < r->nb_atoms)
@@ -1261,11 +1181,6 @@
 	return 0;
 }
 
-static struct sort_dimension switch_sort_dimension = {
-	.name			= "switch",
-	.cmp			= switch_cmp,
-};
-
 static int runtime_cmp(struct work_atoms *l, struct work_atoms *r)
 {
 	if (l->total_runtime < r->total_runtime)
@@ -1276,28 +1191,38 @@
 	return 0;
 }
 
-static struct sort_dimension runtime_sort_dimension = {
-	.name			= "runtime",
-	.cmp			= runtime_cmp,
-};
-
-static struct sort_dimension *available_sorts[] = {
-	&pid_sort_dimension,
-	&avg_sort_dimension,
-	&max_sort_dimension,
-	&switch_sort_dimension,
-	&runtime_sort_dimension,
-};
-
-#define NB_AVAILABLE_SORTS	(int)(sizeof(available_sorts) / sizeof(struct sort_dimension *))
-
-static LIST_HEAD(sort_list);
-
 static int sort_dimension__add(const char *tok, struct list_head *list)
 {
-	int i;
+	size_t i;
+	static struct sort_dimension avg_sort_dimension = {
+		.name = "avg",
+		.cmp  = avg_cmp,
+	};
+	static struct sort_dimension max_sort_dimension = {
+		.name = "max",
+		.cmp  = max_cmp,
+	};
+	static struct sort_dimension pid_sort_dimension = {
+		.name = "pid",
+		.cmp  = pid_cmp,
+	};
+	static struct sort_dimension runtime_sort_dimension = {
+		.name = "runtime",
+		.cmp  = runtime_cmp,
+	};
+	static struct sort_dimension switch_sort_dimension = {
+		.name = "switch",
+		.cmp  = switch_cmp,
+	};
+	struct sort_dimension *available_sorts[] = {
+		&pid_sort_dimension,
+		&avg_sort_dimension,
+		&max_sort_dimension,
+		&switch_sort_dimension,
+		&runtime_sort_dimension,
+	};
 
-	for (i = 0; i < NB_AVAILABLE_SORTS; i++) {
+	for (i = 0; i < ARRAY_SIZE(available_sorts); i++) {
 		if (!strcmp(available_sorts[i]->name, tok)) {
 			list_add_tail(&available_sorts[i]->list, list);
 
@@ -1308,68 +1233,41 @@
 	return -1;
 }
 
-static void setup_sorting(void);
-
-static void sort_lat(void)
+static void perf_sched__sort_lat(struct perf_sched *sched)
 {
 	struct rb_node *node;
 
 	for (;;) {
 		struct work_atoms *data;
-		node = rb_first(&atom_root);
+		node = rb_first(&sched->atom_root);
 		if (!node)
 			break;
 
-		rb_erase(node, &atom_root);
+		rb_erase(node, &sched->atom_root);
 		data = rb_entry(node, struct work_atoms, node);
-		__thread_latency_insert(&sorted_atom_root, data, &sort_list);
+		__thread_latency_insert(&sched->sorted_atom_root, data, &sched->sort_list);
 	}
 }
 
-static struct trace_sched_handler *trace_handler;
-
-static void
-process_sched_wakeup_event(struct perf_tool *tool __used,
-			   struct event_format *event,
-			   struct perf_sample *sample,
-			   struct machine *machine,
-			   struct thread *thread __used)
+static int process_sched_wakeup_event(struct perf_tool *tool,
+				      struct perf_evsel *evsel,
+				      struct perf_sample *sample,
+				      struct machine *machine)
 {
-	void *data = sample->raw_data;
-	struct trace_wakeup_event wakeup_event;
+	struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
 
-	FILL_COMMON_FIELDS(wakeup_event, event, data);
+	if (sched->tp_handler->wakeup_event)
+		return sched->tp_handler->wakeup_event(sched, evsel, sample, machine);
 
-	FILL_ARRAY(wakeup_event, comm, event, data);
-	FILL_FIELD(wakeup_event, pid, event, data);
-	FILL_FIELD(wakeup_event, prio, event, data);
-	FILL_FIELD(wakeup_event, success, event, data);
-	FILL_FIELD(wakeup_event, cpu, event, data);
-
-	if (trace_handler->wakeup_event)
-		trace_handler->wakeup_event(&wakeup_event, machine, event, sample);
+	return 0;
 }
 
-/*
- * Track the current task - that way we can know whether there's any
- * weird events, such as a task being switched away that is not current.
- */
-static int max_cpu;
-
-static u32 curr_pid[MAX_CPUS] = { [0 ... MAX_CPUS-1] = -1 };
-
-static struct thread *curr_thread[MAX_CPUS];
-
-static char next_shortname1 = 'A';
-static char next_shortname2 = '0';
-
-static void
-map_switch_event(struct trace_switch_event *switch_event,
-		 struct machine *machine,
-		 struct event_format *event __used,
-		 struct perf_sample *sample)
+static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
+			    struct perf_sample *sample, struct machine *machine)
 {
-	struct thread *sched_out __used, *sched_in;
+	const u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"),
+		  next_pid = perf_evsel__intval(evsel, sample, "next_pid");
+	struct thread *sched_out __maybe_unused, *sched_in;
 	int new_shortname;
 	u64 timestamp0, timestamp = sample->time;
 	s64 delta;
@@ -1377,54 +1275,55 @@
 
 	BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0);
 
-	if (this_cpu > max_cpu)
-		max_cpu = this_cpu;
+	if (this_cpu > sched->max_cpu)
+		sched->max_cpu = this_cpu;
 
-	timestamp0 = cpu_last_switched[this_cpu];
-	cpu_last_switched[this_cpu] = timestamp;
+	timestamp0 = sched->cpu_last_switched[this_cpu];
+	sched->cpu_last_switched[this_cpu] = timestamp;
 	if (timestamp0)
 		delta = timestamp - timestamp0;
 	else
 		delta = 0;
 
-	if (delta < 0)
-		die("hm, delta: %" PRIu64 " < 0 ?\n", delta);
+	if (delta < 0) {
+		pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta);
+		return -1;
+	}
 
+	sched_out = machine__findnew_thread(machine, prev_pid);
+	sched_in = machine__findnew_thread(machine, next_pid);
 
-	sched_out = machine__findnew_thread(machine, switch_event->prev_pid);
-	sched_in = machine__findnew_thread(machine, switch_event->next_pid);
-
-	curr_thread[this_cpu] = sched_in;
+	sched->curr_thread[this_cpu] = sched_in;
 
 	printf("  ");
 
 	new_shortname = 0;
 	if (!sched_in->shortname[0]) {
-		sched_in->shortname[0] = next_shortname1;
-		sched_in->shortname[1] = next_shortname2;
+		sched_in->shortname[0] = sched->next_shortname1;
+		sched_in->shortname[1] = sched->next_shortname2;
 
-		if (next_shortname1 < 'Z') {
-			next_shortname1++;
+		if (sched->next_shortname1 < 'Z') {
+			sched->next_shortname1++;
 		} else {
-			next_shortname1='A';
-			if (next_shortname2 < '9') {
-				next_shortname2++;
+			sched->next_shortname1='A';
+			if (sched->next_shortname2 < '9') {
+				sched->next_shortname2++;
 			} else {
-				next_shortname2='0';
+				sched->next_shortname2='0';
 			}
 		}
 		new_shortname = 1;
 	}
 
-	for (cpu = 0; cpu <= max_cpu; cpu++) {
+	for (cpu = 0; cpu <= sched->max_cpu; cpu++) {
 		if (cpu != this_cpu)
 			printf(" ");
 		else
 			printf("*");
 
-		if (curr_thread[cpu]) {
-			if (curr_thread[cpu]->pid)
-				printf("%2s ", curr_thread[cpu]->shortname);
+		if (sched->curr_thread[cpu]) {
+			if (sched->curr_thread[cpu]->pid)
+				printf("%2s ", sched->curr_thread[cpu]->shortname);
 			else
 				printf(".  ");
 		} else
@@ -1438,127 +1337,97 @@
 	} else {
 		printf("\n");
 	}
+
+	return 0;
 }
 
-static void
-process_sched_switch_event(struct perf_tool *tool __used,
-			   struct event_format *event,
-			   struct perf_sample *sample,
-			   struct machine *machine,
-			   struct thread *thread __used)
+static int process_sched_switch_event(struct perf_tool *tool,
+				      struct perf_evsel *evsel,
+				      struct perf_sample *sample,
+				      struct machine *machine)
 {
-	int this_cpu = sample->cpu;
-	void *data = sample->raw_data;
-	struct trace_switch_event switch_event;
+	struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
+	int this_cpu = sample->cpu, err = 0;
+	u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"),
+	    next_pid = perf_evsel__intval(evsel, sample, "next_pid");
 
-	FILL_COMMON_FIELDS(switch_event, event, data);
-
-	FILL_ARRAY(switch_event, prev_comm, event, data);
-	FILL_FIELD(switch_event, prev_pid, event, data);
-	FILL_FIELD(switch_event, prev_prio, event, data);
-	FILL_FIELD(switch_event, prev_state, event, data);
-	FILL_ARRAY(switch_event, next_comm, event, data);
-	FILL_FIELD(switch_event, next_pid, event, data);
-	FILL_FIELD(switch_event, next_prio, event, data);
-
-	if (curr_pid[this_cpu] != (u32)-1) {
+	if (sched->curr_pid[this_cpu] != (u32)-1) {
 		/*
 		 * Are we trying to switch away a PID that is
 		 * not current?
 		 */
-		if (curr_pid[this_cpu] != switch_event.prev_pid)
-			nr_context_switch_bugs++;
+		if (sched->curr_pid[this_cpu] != prev_pid)
+			sched->nr_context_switch_bugs++;
 	}
-	if (trace_handler->switch_event)
-		trace_handler->switch_event(&switch_event, machine, event, sample);
 
-	curr_pid[this_cpu] = switch_event.next_pid;
+	if (sched->tp_handler->switch_event)
+		err = sched->tp_handler->switch_event(sched, evsel, sample, machine);
+
+	sched->curr_pid[this_cpu] = next_pid;
+	return err;
 }
 
-static void
-process_sched_runtime_event(struct perf_tool *tool __used,
-			    struct event_format *event,
-			    struct perf_sample *sample,
-			    struct machine *machine,
-			    struct thread *thread __used)
+static int process_sched_runtime_event(struct perf_tool *tool,
+				       struct perf_evsel *evsel,
+				       struct perf_sample *sample,
+				       struct machine *machine)
 {
-	void *data = sample->raw_data;
-	struct trace_runtime_event runtime_event;
+	struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
 
-	FILL_ARRAY(runtime_event, comm, event, data);
-	FILL_FIELD(runtime_event, pid, event, data);
-	FILL_FIELD(runtime_event, runtime, event, data);
-	FILL_FIELD(runtime_event, vruntime, event, data);
+	if (sched->tp_handler->runtime_event)
+		return sched->tp_handler->runtime_event(sched, evsel, sample, machine);
 
-	if (trace_handler->runtime_event)
-		trace_handler->runtime_event(&runtime_event, machine, sample);
+	return 0;
 }
 
-static void
-process_sched_fork_event(struct perf_tool *tool __used,
-			 struct event_format *event,
-			 struct perf_sample *sample,
-			 struct machine *machine __used,
-			 struct thread *thread __used)
+static int process_sched_fork_event(struct perf_tool *tool,
+				    struct perf_evsel *evsel,
+				    struct perf_sample *sample,
+				    struct machine *machine __maybe_unused)
 {
-	void *data = sample->raw_data;
-	struct trace_fork_event fork_event;
+	struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
 
-	FILL_COMMON_FIELDS(fork_event, event, data);
+	if (sched->tp_handler->fork_event)
+		return sched->tp_handler->fork_event(sched, evsel, sample);
 
-	FILL_ARRAY(fork_event, parent_comm, event, data);
-	FILL_FIELD(fork_event, parent_pid, event, data);
-	FILL_ARRAY(fork_event, child_comm, event, data);
-	FILL_FIELD(fork_event, child_pid, event, data);
-
-	if (trace_handler->fork_event)
-		trace_handler->fork_event(&fork_event, event);
+	return 0;
 }
 
-static void
-process_sched_exit_event(struct perf_tool *tool __used,
-			 struct event_format *event,
-			 struct perf_sample *sample __used,
-			 struct machine *machine __used,
-			 struct thread *thread __used)
+static int process_sched_exit_event(struct perf_tool *tool __maybe_unused,
+				    struct perf_evsel *evsel,
+				    struct perf_sample *sample __maybe_unused,
+				    struct machine *machine __maybe_unused)
 {
-	if (verbose)
-		printf("sched_exit event %p\n", event);
+	pr_debug("sched_exit event %p\n", evsel);
+	return 0;
 }
 
-static void
-process_sched_migrate_task_event(struct perf_tool *tool __used,
-				 struct event_format *event,
-				 struct perf_sample *sample,
-				 struct machine *machine,
-				 struct thread *thread __used)
+static int process_sched_migrate_task_event(struct perf_tool *tool,
+					    struct perf_evsel *evsel,
+					    struct perf_sample *sample,
+					    struct machine *machine)
 {
-	void *data = sample->raw_data;
-	struct trace_migrate_task_event migrate_task_event;
+	struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
 
-	FILL_COMMON_FIELDS(migrate_task_event, event, data);
+	if (sched->tp_handler->migrate_task_event)
+		return sched->tp_handler->migrate_task_event(sched, evsel, sample, machine);
 
-	FILL_ARRAY(migrate_task_event, comm, event, data);
-	FILL_FIELD(migrate_task_event, pid, event, data);
-	FILL_FIELD(migrate_task_event, prio, event, data);
-	FILL_FIELD(migrate_task_event, cpu, event, data);
-
-	if (trace_handler->migrate_task_event)
-		trace_handler->migrate_task_event(&migrate_task_event, machine, sample);
+	return 0;
 }
 
-typedef void (*tracepoint_handler)(struct perf_tool *tool, struct event_format *event,
-				   struct perf_sample *sample,
-				   struct machine *machine,
-				   struct thread *thread);
+typedef int (*tracepoint_handler)(struct perf_tool *tool,
+				  struct perf_evsel *evsel,
+				  struct perf_sample *sample,
+				  struct machine *machine);
 
-static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __used,
-						 union perf_event *event __used,
+static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __maybe_unused,
+						 union perf_event *event __maybe_unused,
 						 struct perf_sample *sample,
 						 struct perf_evsel *evsel,
 						 struct machine *machine)
 {
 	struct thread *thread = machine__findnew_thread(machine, sample->pid);
+	int err = 0;
 
 	if (thread == NULL) {
 		pr_debug("problem processing %s event, skipping it.\n",
@@ -1571,23 +1440,15 @@
 
 	if (evsel->handler.func != NULL) {
 		tracepoint_handler f = evsel->handler.func;
-		f(tool, evsel->tp_format, sample, machine, thread);
+		err = f(tool, evsel, sample, machine);
 	}
 
-	return 0;
+	return err;
 }
 
-static struct perf_tool perf_sched = {
-	.sample		 = perf_sched__process_tracepoint_sample,
-	.comm		 = perf_event__process_comm,
-	.lost		 = perf_event__process_lost,
-	.fork		 = perf_event__process_task,
-	.ordered_samples = true,
-};
-
-static void read_events(bool destroy, struct perf_session **psession)
+static int perf_sched__read_events(struct perf_sched *sched, bool destroy,
+				   struct perf_session **psession)
 {
-	int err = -EINVAL;
 	const struct perf_evsel_str_handler handlers[] = {
 		{ "sched:sched_switch",	      process_sched_switch_event, },
 		{ "sched:sched_stat_runtime", process_sched_runtime_event, },
@@ -1599,21 +1460,25 @@
 	};
 	struct perf_session *session;
 
-	session = perf_session__new(input_name, O_RDONLY, 0, false, &perf_sched);
-	if (session == NULL)
-		die("No Memory");
+	session = perf_session__new(sched->input_name, O_RDONLY, 0, false, &sched->tool);
+	if (session == NULL) {
+		pr_debug("No Memory for session\n");
+		return -1;
+	}
 
-	err = perf_session__set_tracepoints_handlers(session, handlers);
-	assert(err == 0);
+	if (perf_session__set_tracepoints_handlers(session, handlers))
+		goto out_delete;
 
 	if (perf_session__has_traces(session, "record -R")) {
-		err = perf_session__process_events(session, &perf_sched);
-		if (err)
-			die("Failed to process events, error %d", err);
+		int err = perf_session__process_events(session, &sched->tool);
+		if (err) {
+			pr_err("Failed to process events, error %d", err);
+			goto out_delete;
+		}
 
-		nr_events      = session->hists.stats.nr_events[0];
-		nr_lost_events = session->hists.stats.total_lost;
-		nr_lost_chunks = session->hists.stats.nr_events[PERF_RECORD_LOST];
+		sched->nr_events      = session->hists.stats.nr_events[0];
+		sched->nr_lost_events = session->hists.stats.total_lost;
+		sched->nr_lost_chunks = session->hists.stats.nr_events[PERF_RECORD_LOST];
 	}
 
 	if (destroy)
@@ -1621,208 +1486,166 @@
 
 	if (psession)
 		*psession = session;
+
+	return 0;
+
+out_delete:
+	perf_session__delete(session);
+	return -1;
 }
 
-static void print_bad_events(void)
+static void print_bad_events(struct perf_sched *sched)
 {
-	if (nr_unordered_timestamps && nr_timestamps) {
+	if (sched->nr_unordered_timestamps && sched->nr_timestamps) {
 		printf("  INFO: %.3f%% unordered timestamps (%ld out of %ld)\n",
-			(double)nr_unordered_timestamps/(double)nr_timestamps*100.0,
-			nr_unordered_timestamps, nr_timestamps);
+			(double)sched->nr_unordered_timestamps/(double)sched->nr_timestamps*100.0,
+			sched->nr_unordered_timestamps, sched->nr_timestamps);
 	}
-	if (nr_lost_events && nr_events) {
+	if (sched->nr_lost_events && sched->nr_events) {
 		printf("  INFO: %.3f%% lost events (%ld out of %ld, in %ld chunks)\n",
-			(double)nr_lost_events/(double)nr_events*100.0,
-			nr_lost_events, nr_events, nr_lost_chunks);
+			(double)sched->nr_lost_events/(double)sched->nr_events * 100.0,
+			sched->nr_lost_events, sched->nr_events, sched->nr_lost_chunks);
 	}
-	if (nr_state_machine_bugs && nr_timestamps) {
+	if (sched->nr_state_machine_bugs && sched->nr_timestamps) {
 		printf("  INFO: %.3f%% state machine bugs (%ld out of %ld)",
-			(double)nr_state_machine_bugs/(double)nr_timestamps*100.0,
-			nr_state_machine_bugs, nr_timestamps);
-		if (nr_lost_events)
+			(double)sched->nr_state_machine_bugs/(double)sched->nr_timestamps*100.0,
+			sched->nr_state_machine_bugs, sched->nr_timestamps);
+		if (sched->nr_lost_events)
 			printf(" (due to lost events?)");
 		printf("\n");
 	}
-	if (nr_context_switch_bugs && nr_timestamps) {
+	if (sched->nr_context_switch_bugs && sched->nr_timestamps) {
 		printf("  INFO: %.3f%% context switch bugs (%ld out of %ld)",
-			(double)nr_context_switch_bugs/(double)nr_timestamps*100.0,
-			nr_context_switch_bugs, nr_timestamps);
-		if (nr_lost_events)
+			(double)sched->nr_context_switch_bugs/(double)sched->nr_timestamps*100.0,
+			sched->nr_context_switch_bugs, sched->nr_timestamps);
+		if (sched->nr_lost_events)
 			printf(" (due to lost events?)");
 		printf("\n");
 	}
 }
 
-static void __cmd_lat(void)
+static int perf_sched__lat(struct perf_sched *sched)
 {
 	struct rb_node *next;
 	struct perf_session *session;
 
 	setup_pager();
-	read_events(false, &session);
-	sort_lat();
+	if (perf_sched__read_events(sched, false, &session))
+		return -1;
+	perf_sched__sort_lat(sched);
 
 	printf("\n ---------------------------------------------------------------------------------------------------------------\n");
 	printf("  Task                  |   Runtime ms  | Switches | Average delay ms | Maximum delay ms | Maximum delay at     |\n");
 	printf(" ---------------------------------------------------------------------------------------------------------------\n");
 
-	next = rb_first(&sorted_atom_root);
+	next = rb_first(&sched->sorted_atom_root);
 
 	while (next) {
 		struct work_atoms *work_list;
 
 		work_list = rb_entry(next, struct work_atoms, node);
-		output_lat_thread(work_list);
+		output_lat_thread(sched, work_list);
 		next = rb_next(next);
 	}
 
 	printf(" -----------------------------------------------------------------------------------------\n");
 	printf("  TOTAL:                |%11.3f ms |%9" PRIu64 " |\n",
-		(double)all_runtime/1e6, all_count);
+		(double)sched->all_runtime / 1e6, sched->all_count);
 
 	printf(" ---------------------------------------------------\n");
 
-	print_bad_events();
+	print_bad_events(sched);
 	printf("\n");
 
 	perf_session__delete(session);
+	return 0;
 }
 
-static struct trace_sched_handler map_ops  = {
-	.wakeup_event		= NULL,
-	.switch_event		= map_switch_event,
-	.runtime_event		= NULL,
-	.fork_event		= NULL,
-};
-
-static void __cmd_map(void)
+static int perf_sched__map(struct perf_sched *sched)
 {
-	max_cpu = sysconf(_SC_NPROCESSORS_CONF);
+	sched->max_cpu = sysconf(_SC_NPROCESSORS_CONF);
 
 	setup_pager();
-	read_events(true, NULL);
-	print_bad_events();
+	if (perf_sched__read_events(sched, true, NULL))
+		return -1;
+	print_bad_events(sched);
+	return 0;
 }
 
-static void __cmd_replay(void)
+static int perf_sched__replay(struct perf_sched *sched)
 {
 	unsigned long i;
 
-	calibrate_run_measurement_overhead();
-	calibrate_sleep_measurement_overhead();
+	calibrate_run_measurement_overhead(sched);
+	calibrate_sleep_measurement_overhead(sched);
 
-	test_calibrations();
+	test_calibrations(sched);
 
-	read_events(true, NULL);
+	if (perf_sched__read_events(sched, true, NULL))
+		return -1;
 
-	printf("nr_run_events:        %ld\n", nr_run_events);
-	printf("nr_sleep_events:      %ld\n", nr_sleep_events);
-	printf("nr_wakeup_events:     %ld\n", nr_wakeup_events);
+	printf("nr_run_events:        %ld\n", sched->nr_run_events);
+	printf("nr_sleep_events:      %ld\n", sched->nr_sleep_events);
+	printf("nr_wakeup_events:     %ld\n", sched->nr_wakeup_events);
 
-	if (targetless_wakeups)
-		printf("target-less wakeups:  %ld\n", targetless_wakeups);
-	if (multitarget_wakeups)
-		printf("multi-target wakeups: %ld\n", multitarget_wakeups);
-	if (nr_run_events_optimized)
+	if (sched->targetless_wakeups)
+		printf("target-less wakeups:  %ld\n", sched->targetless_wakeups);
+	if (sched->multitarget_wakeups)
+		printf("multi-target wakeups: %ld\n", sched->multitarget_wakeups);
+	if (sched->nr_run_events_optimized)
 		printf("run atoms optimized: %ld\n",
-			nr_run_events_optimized);
+			sched->nr_run_events_optimized);
 
-	print_task_traces();
-	add_cross_task_wakeups();
+	print_task_traces(sched);
+	add_cross_task_wakeups(sched);
 
-	create_tasks();
+	create_tasks(sched);
 	printf("------------------------------------------------------------\n");
-	for (i = 0; i < replay_repeat; i++)
-		run_one_test();
+	for (i = 0; i < sched->replay_repeat; i++)
+		run_one_test(sched);
+
+	return 0;
 }
 
-
-static const char * const sched_usage[] = {
-	"perf sched [<options>] {record|latency|map|replay|script}",
-	NULL
-};
-
-static const struct option sched_options[] = {
-	OPT_STRING('i', "input", &input_name, "file",
-		    "input file name"),
-	OPT_INCR('v', "verbose", &verbose,
-		    "be more verbose (show symbol address, etc)"),
-	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
-		    "dump raw trace in ASCII"),
-	OPT_END()
-};
-
-static const char * const latency_usage[] = {
-	"perf sched latency [<options>]",
-	NULL
-};
-
-static const struct option latency_options[] = {
-	OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
-		   "sort by key(s): runtime, switch, avg, max"),
-	OPT_INCR('v', "verbose", &verbose,
-		    "be more verbose (show symbol address, etc)"),
-	OPT_INTEGER('C', "CPU", &profile_cpu,
-		    "CPU to profile on"),
-	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
-		    "dump raw trace in ASCII"),
-	OPT_END()
-};
-
-static const char * const replay_usage[] = {
-	"perf sched replay [<options>]",
-	NULL
-};
-
-static const struct option replay_options[] = {
-	OPT_UINTEGER('r', "repeat", &replay_repeat,
-		     "repeat the workload replay N times (-1: infinite)"),
-	OPT_INCR('v', "verbose", &verbose,
-		    "be more verbose (show symbol address, etc)"),
-	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
-		    "dump raw trace in ASCII"),
-	OPT_END()
-};
-
-static void setup_sorting(void)
+static void setup_sorting(struct perf_sched *sched, const struct option *options,
+			  const char * const usage_msg[])
 {
-	char *tmp, *tok, *str = strdup(sort_order);
+	char *tmp, *tok, *str = strdup(sched->sort_order);
 
 	for (tok = strtok_r(str, ", ", &tmp);
 			tok; tok = strtok_r(NULL, ", ", &tmp)) {
-		if (sort_dimension__add(tok, &sort_list) < 0) {
+		if (sort_dimension__add(tok, &sched->sort_list) < 0) {
 			error("Unknown --sort key: `%s'", tok);
-			usage_with_options(latency_usage, latency_options);
+			usage_with_options(usage_msg, options);
 		}
 	}
 
 	free(str);
 
-	sort_dimension__add("pid", &cmp_pid);
+	sort_dimension__add("pid", &sched->cmp_pid);
 }
 
-static const char *record_args[] = {
-	"record",
-	"-a",
-	"-R",
-	"-f",
-	"-m", "1024",
-	"-c", "1",
-	"-e", "sched:sched_switch",
-	"-e", "sched:sched_stat_wait",
-	"-e", "sched:sched_stat_sleep",
-	"-e", "sched:sched_stat_iowait",
-	"-e", "sched:sched_stat_runtime",
-	"-e", "sched:sched_process_exit",
-	"-e", "sched:sched_process_fork",
-	"-e", "sched:sched_wakeup",
-	"-e", "sched:sched_migrate_task",
-};
-
 static int __cmd_record(int argc, const char **argv)
 {
 	unsigned int rec_argc, i, j;
 	const char **rec_argv;
+	const char * const record_args[] = {
+		"record",
+		"-a",
+		"-R",
+		"-f",
+		"-m", "1024",
+		"-c", "1",
+		"-e", "sched:sched_switch",
+		"-e", "sched:sched_stat_wait",
+		"-e", "sched:sched_stat_sleep",
+		"-e", "sched:sched_stat_iowait",
+		"-e", "sched:sched_stat_runtime",
+		"-e", "sched:sched_process_exit",
+		"-e", "sched:sched_process_fork",
+		"-e", "sched:sched_wakeup",
+		"-e", "sched:sched_migrate_task",
+	};
 
 	rec_argc = ARRAY_SIZE(record_args) + argc - 1;
 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
@@ -1841,8 +1664,85 @@
 	return cmd_record(i, rec_argv, NULL);
 }
 
-int cmd_sched(int argc, const char **argv, const char *prefix __used)
+int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
 {
+	const char default_sort_order[] = "avg, max, switch, runtime";
+	struct perf_sched sched = {
+		.tool = {
+			.sample		 = perf_sched__process_tracepoint_sample,
+			.comm		 = perf_event__process_comm,
+			.lost		 = perf_event__process_lost,
+			.fork		 = perf_event__process_task,
+			.ordered_samples = true,
+		},
+		.cmp_pid	      = LIST_HEAD_INIT(sched.cmp_pid),
+		.sort_list	      = LIST_HEAD_INIT(sched.sort_list),
+		.start_work_mutex     = PTHREAD_MUTEX_INITIALIZER,
+		.work_done_wait_mutex = PTHREAD_MUTEX_INITIALIZER,
+		.curr_pid	      = { [0 ... MAX_CPUS - 1] = -1 },
+		.sort_order	      = default_sort_order,
+		.replay_repeat	      = 10,
+		.profile_cpu	      = -1,
+		.next_shortname1      = 'A',
+		.next_shortname2      = '0',
+	};
+	const struct option latency_options[] = {
+	OPT_STRING('s', "sort", &sched.sort_order, "key[,key2...]",
+		   "sort by key(s): runtime, switch, avg, max"),
+	OPT_INCR('v', "verbose", &verbose,
+		    "be more verbose (show symbol address, etc)"),
+	OPT_INTEGER('C', "CPU", &sched.profile_cpu,
+		    "CPU to profile on"),
+	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
+		    "dump raw trace in ASCII"),
+	OPT_END()
+	};
+	const struct option replay_options[] = {
+	OPT_UINTEGER('r', "repeat", &sched.replay_repeat,
+		     "repeat the workload replay N times (-1: infinite)"),
+	OPT_INCR('v', "verbose", &verbose,
+		    "be more verbose (show symbol address, etc)"),
+	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
+		    "dump raw trace in ASCII"),
+	OPT_END()
+	};
+	const struct option sched_options[] = {
+	OPT_STRING('i', "input", &sched.input_name, "file",
+		    "input file name"),
+	OPT_INCR('v', "verbose", &verbose,
+		    "be more verbose (show symbol address, etc)"),
+	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
+		    "dump raw trace in ASCII"),
+	OPT_END()
+	};
+	const char * const latency_usage[] = {
+		"perf sched latency [<options>]",
+		NULL
+	};
+	const char * const replay_usage[] = {
+		"perf sched replay [<options>]",
+		NULL
+	};
+	const char * const sched_usage[] = {
+		"perf sched [<options>] {record|latency|map|replay|script}",
+		NULL
+	};
+	struct trace_sched_handler lat_ops  = {
+		.wakeup_event	    = latency_wakeup_event,
+		.switch_event	    = latency_switch_event,
+		.runtime_event	    = latency_runtime_event,
+		.fork_event	    = latency_fork_event,
+		.migrate_task_event = latency_migrate_task_event,
+	};
+	struct trace_sched_handler map_ops  = {
+		.switch_event	    = map_switch_event,
+	};
+	struct trace_sched_handler replay_ops  = {
+		.wakeup_event	    = replay_wakeup_event,
+		.switch_event	    = replay_switch_event,
+		.fork_event	    = replay_fork_event,
+	};
+
 	argc = parse_options(argc, argv, sched_options, sched_usage,
 			     PARSE_OPT_STOP_AT_NON_OPTION);
 	if (!argc)
@@ -1858,26 +1758,26 @@
 	if (!strncmp(argv[0], "rec", 3)) {
 		return __cmd_record(argc, argv);
 	} else if (!strncmp(argv[0], "lat", 3)) {
-		trace_handler = &lat_ops;
+		sched.tp_handler = &lat_ops;
 		if (argc > 1) {
 			argc = parse_options(argc, argv, latency_options, latency_usage, 0);
 			if (argc)
 				usage_with_options(latency_usage, latency_options);
 		}
-		setup_sorting();
-		__cmd_lat();
+		setup_sorting(&sched, latency_options, latency_usage);
+		return perf_sched__lat(&sched);
 	} else if (!strcmp(argv[0], "map")) {
-		trace_handler = &map_ops;
-		setup_sorting();
-		__cmd_map();
+		sched.tp_handler = &map_ops;
+		setup_sorting(&sched, latency_options, latency_usage);
+		return perf_sched__map(&sched);
 	} else if (!strncmp(argv[0], "rep", 3)) {
-		trace_handler = &replay_ops;
+		sched.tp_handler = &replay_ops;
 		if (argc) {
 			argc = parse_options(argc, argv, replay_options, replay_usage, 0);
 			if (argc)
 				usage_with_options(replay_usage, replay_options);
 		}
-		__cmd_replay();
+		return perf_sched__replay(&sched);
 	} else {
 		usage_with_options(sched_usage, sched_options);
 	}
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 2d6e3b2..1be843a 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -14,6 +14,7 @@
 #include "util/util.h"
 #include "util/evlist.h"
 #include "util/evsel.h"
+#include "util/sort.h"
 #include <linux/bitmap.h>
 
 static char const		*script_name;
@@ -430,9 +431,9 @@
 	printf("\n");
 }
 
-static int default_start_script(const char *script __unused,
-				int argc __unused,
-				const char **argv __unused)
+static int default_start_script(const char *script __maybe_unused,
+				int argc __maybe_unused,
+				const char **argv __maybe_unused)
 {
 	return 0;
 }
@@ -442,8 +443,8 @@
 	return 0;
 }
 
-static int default_generate_script(struct pevent *pevent __unused,
-				   const char *outfile __unused)
+static int default_generate_script(struct pevent *pevent __maybe_unused,
+				   const char *outfile __maybe_unused)
 {
 	return 0;
 }
@@ -474,7 +475,7 @@
 
 static const char *input_name;
 
-static int process_sample_event(struct perf_tool *tool __used,
+static int process_sample_event(struct perf_tool *tool __maybe_unused,
 				union perf_event *event,
 				struct perf_sample *sample,
 				struct perf_evsel *evsel,
@@ -534,7 +535,7 @@
 
 extern volatile int session_done;
 
-static void sig_handler(int sig __unused)
+static void sig_handler(int sig __maybe_unused)
 {
 	session_done = 1;
 }
@@ -644,8 +645,8 @@
 	fprintf(stderr, "\n");
 }
 
-static int parse_scriptname(const struct option *opt __used,
-			    const char *str, int unset __used)
+static int parse_scriptname(const struct option *opt __maybe_unused,
+			    const char *str, int unset __maybe_unused)
 {
 	char spec[PATH_MAX];
 	const char *script, *ext;
@@ -690,8 +691,8 @@
 	return 0;
 }
 
-static int parse_output_fields(const struct option *opt __used,
-			    const char *arg, int unset __used)
+static int parse_output_fields(const struct option *opt __maybe_unused,
+			    const char *arg, int unset __maybe_unused)
 {
 	char *tok;
 	int i, imax = sizeof(all_output_options) / sizeof(struct output_option);
@@ -982,8 +983,9 @@
 	return script_root;
 }
 
-static int list_available_scripts(const struct option *opt __used,
-				  const char *s __used, int unset __used)
+static int list_available_scripts(const struct option *opt __maybe_unused,
+				  const char *s __maybe_unused,
+				  int unset __maybe_unused)
 {
 	struct dirent *script_next, *lang_next, script_dirent, lang_dirent;
 	char scripts_path[MAXPATHLEN];
@@ -1030,6 +1032,61 @@
 	exit(0);
 }
 
+/*
+ * Return -1 if none is found, otherwise the actual scripts number.
+ *
+ * Currently the only user of this function is the script browser, which
+ * will list all statically runnable scripts, select one, execute it and
+ * show the output in a perf browser.
+ */
+int find_scripts(char **scripts_array, char **scripts_path_array)
+{
+	struct dirent *script_next, *lang_next, script_dirent, lang_dirent;
+	char scripts_path[MAXPATHLEN];
+	DIR *scripts_dir, *lang_dir;
+	char lang_path[MAXPATHLEN];
+	char *temp;
+	int i = 0;
+
+	snprintf(scripts_path, MAXPATHLEN, "%s/scripts", perf_exec_path());
+
+	scripts_dir = opendir(scripts_path);
+	if (!scripts_dir)
+		return -1;
+
+	for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next) {
+		snprintf(lang_path, MAXPATHLEN, "%s/%s", scripts_path,
+			 lang_dirent.d_name);
+#ifdef NO_LIBPERL
+		if (strstr(lang_path, "perl"))
+			continue;
+#endif
+#ifdef NO_LIBPYTHON
+		if (strstr(lang_path, "python"))
+			continue;
+#endif
+
+		lang_dir = opendir(lang_path);
+		if (!lang_dir)
+			continue;
+
+		for_each_script(lang_path, lang_dir, script_dirent, script_next) {
+			/* Skip those real time scripts: xxxtop.p[yl] */
+			if (strstr(script_dirent.d_name, "top."))
+				continue;
+			sprintf(scripts_path_array[i], "%s/%s", lang_path,
+				script_dirent.d_name);
+			temp = strchr(script_dirent.d_name, '.');
+			snprintf(scripts_array[i],
+				(temp - script_dirent.d_name) + 1,
+				"%s", script_dirent.d_name);
+			i++;
+		}
+	}
+
+	return i;
+}
+
 static char *get_script_path(const char *script_root, const char *suffix)
 {
 	struct dirent *script_next, *lang_next, script_dirent, lang_dirent;
@@ -1142,6 +1199,8 @@
 		     parse_output_fields),
 	OPT_BOOLEAN('a', "all-cpus", &system_wide,
 		     "system-wide collection from all CPUs"),
+	OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
+		   "only consider these symbols"),
 	OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
 	OPT_STRING('c', "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
 		   "only display events for these comms"),
@@ -1153,21 +1212,26 @@
 	OPT_END()
 };
 
-static bool have_cmd(int argc, const char **argv)
+static int have_cmd(int argc, const char **argv)
 {
 	char **__argv = malloc(sizeof(const char *) * argc);
 
-	if (!__argv)
-		die("malloc");
+	if (!__argv) {
+		pr_err("malloc failed\n");
+		return -1;
+	}
+
 	memcpy(__argv, argv, sizeof(const char *) * argc);
 	argc = parse_options(argc, (const char **)__argv, record_options,
 			     NULL, PARSE_OPT_STOP_AT_NON_OPTION);
 	free(__argv);
 
-	return argc != 0;
+	system_wide = (argc == 0);
+
+	return 0;
 }
 
-int cmd_script(int argc, const char **argv, const char *prefix __used)
+int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
 {
 	char *rec_script_path = NULL;
 	char *rep_script_path = NULL;
@@ -1231,13 +1295,13 @@
 
 		if (pipe(live_pipe) < 0) {
 			perror("failed to create pipe");
-			exit(-1);
+			return -1;
 		}
 
 		pid = fork();
 		if (pid < 0) {
 			perror("failed to fork");
-			exit(-1);
+			return -1;
 		}
 
 		if (!pid) {
@@ -1249,13 +1313,18 @@
 			if (is_top_script(argv[0])) {
 				system_wide = true;
 			} else if (!system_wide) {
-				system_wide = !have_cmd(argc - rep_args,
-							&argv[rep_args]);
+				if (have_cmd(argc - rep_args, &argv[rep_args]) != 0) {
+					err = -1;
+					goto out;
+				}
 			}
 
 			__argv = malloc((argc + 6) * sizeof(const char *));
-			if (!__argv)
-				die("malloc");
+			if (!__argv) {
+				pr_err("malloc failed\n");
+				err = -ENOMEM;
+				goto out;
+			}
 
 			__argv[j++] = "/bin/sh";
 			__argv[j++] = rec_script_path;
@@ -1277,8 +1346,12 @@
 		close(live_pipe[1]);
 
 		__argv = malloc((argc + 4) * sizeof(const char *));
-		if (!__argv)
-			die("malloc");
+		if (!__argv) {
+			pr_err("malloc failed\n");
+			err = -ENOMEM;
+			goto out;
+		}
+
 		j = 0;
 		__argv[j++] = "/bin/sh";
 		__argv[j++] = rep_script_path;
@@ -1303,12 +1376,20 @@
 
 		if (!rec_script_path)
 			system_wide = false;
-		else if (!system_wide)
-			system_wide = !have_cmd(argc - 1, &argv[1]);
+		else if (!system_wide) {
+			if (have_cmd(argc - 1, &argv[1]) != 0) {
+				err = -1;
+				goto out;
+			}
+		}
 
 		__argv = malloc((argc + 2) * sizeof(const char *));
-		if (!__argv)
-			die("malloc");
+		if (!__argv) {
+			pr_err("malloc failed\n");
+			err = -ENOMEM;
+			goto out;
+		}
+
 		__argv[j++] = "/bin/sh";
 		__argv[j++] = script_path;
 		if (system_wide)
@@ -1357,18 +1438,18 @@
 		input = open(session->filename, O_RDONLY);	/* input_name */
 		if (input < 0) {
 			perror("failed to open file");
-			exit(-1);
+			return -1;
 		}
 
 		err = fstat(input, &perf_stat);
 		if (err < 0) {
 			perror("failed to stat file");
-			exit(-1);
+			return -1;
 		}
 
 		if (!perf_stat.st_size) {
 			fprintf(stderr, "zero-sized file, nothing to do!\n");
-			exit(0);
+			return 0;
 		}
 
 		scripting_ops = script_spec__lookup(generate_script_lang);
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index d53d8ab..e0f65fe 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -51,13 +51,13 @@
 #include "util/evsel.h"
 #include "util/debug.h"
 #include "util/color.h"
+#include "util/stat.h"
 #include "util/header.h"
 #include "util/cpumap.h"
 #include "util/thread.h"
 #include "util/thread_map.h"
 
 #include <sys/prctl.h>
-#include <math.h>
 #include <locale.h>
 
 #define DEFAULT_SEPARATOR	" "
@@ -199,11 +199,6 @@
 
 static volatile int done = 0;
 
-struct stats
-{
-	double n, mean, M2;
-};
-
 struct perf_stat {
 	struct stats	  res_stats[3];
 };
@@ -220,48 +215,14 @@
 	evsel->priv = NULL;
 }
 
-static void update_stats(struct stats *stats, u64 val)
+static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel)
 {
-	double delta;
-
-	stats->n++;
-	delta = val - stats->mean;
-	stats->mean += delta / stats->n;
-	stats->M2 += delta*(val - stats->mean);
+	return (evsel->cpus && !target.cpu_list) ? evsel->cpus : evsel_list->cpus;
 }
 
-static double avg_stats(struct stats *stats)
+static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel)
 {
-	return stats->mean;
-}
-
-/*
- * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
- *
- *       (\Sum n_i^2) - ((\Sum n_i)^2)/n
- * s^2 = -------------------------------
- *                  n - 1
- *
- * http://en.wikipedia.org/wiki/Stddev
- *
- * The std dev of the mean is related to the std dev by:
- *
- *             s
- * s_mean = -------
- *          sqrt(n)
- *
- */
-static double stddev_stats(struct stats *stats)
-{
-	double variance, variance_mean;
-
-	if (!stats->n)
-		return 0.0;
-
-	variance = stats->M2 / (stats->n - 1);
-	variance_mean = variance / stats->n;
-
-	return sqrt(variance_mean);
+	return perf_evsel__cpus(evsel)->nr;
 }
 
 static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
@@ -295,7 +256,7 @@
 		evsel->attr.exclude_guest = evsel->attr.exclude_host = 0;
 
 	if (perf_target__has_cpu(&target)) {
-		ret = perf_evsel__open_per_cpu(evsel, evsel_list->cpus);
+		ret = perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel));
 		if (ret)
 			goto check_ret;
 		return 0;
@@ -376,7 +337,7 @@
 	u64 *count = counter->counts->aggr.values;
 	int i;
 
-	if (__perf_evsel__read(counter, evsel_list->cpus->nr,
+	if (__perf_evsel__read(counter, perf_evsel__nr_cpus(counter),
 			       evsel_list->threads->nr, scale) < 0)
 		return -1;
 
@@ -405,7 +366,7 @@
 	u64 *count;
 	int cpu;
 
-	for (cpu = 0; cpu < evsel_list->cpus->nr; cpu++) {
+	for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
 		if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0)
 			return -1;
 
@@ -417,7 +378,7 @@
 	return 0;
 }
 
-static int run_perf_stat(int argc __used, const char **argv)
+static int run_perf_stat(int argc __maybe_unused, const char **argv)
 {
 	unsigned long long t0, t1;
 	struct perf_evsel *counter, *first;
@@ -428,7 +389,7 @@
 
 	if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
 		perror("failed to create pipes");
-		exit(1);
+		return -1;
 	}
 
 	if (forks) {
@@ -510,7 +471,8 @@
 			}
 			if (child_pid != -1)
 				kill(child_pid, SIGTERM);
-			die("Not all events could be opened.\n");
+
+			pr_err("Not all events could be opened.\n");
 			return -1;
 		}
 		counter->supported = true;
@@ -543,12 +505,12 @@
 	if (no_aggr) {
 		list_for_each_entry(counter, &evsel_list->entries, node) {
 			read_counter(counter);
-			perf_evsel__close_fd(counter, evsel_list->cpus->nr, 1);
+			perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 1);
 		}
 	} else {
 		list_for_each_entry(counter, &evsel_list->entries, node) {
 			read_counter_aggr(counter);
-			perf_evsel__close_fd(counter, evsel_list->cpus->nr,
+			perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter),
 					     evsel_list->threads->nr);
 		}
 	}
@@ -558,10 +520,7 @@
 
 static void print_noise_pct(double total, double avg)
 {
-	double pct = 0.0;
-
-	if (avg)
-		pct = 100.0*total/avg;
+	double pct = rel_stddev_stats(total, avg);
 
 	if (csv_output)
 		fprintf(output, "%s%.2f%%", csv_sep, pct);
@@ -589,7 +548,7 @@
 	if (no_aggr)
 		sprintf(cpustr, "CPU%*d%s",
 			csv_output ? 0 : -4,
-			evsel_list->cpus->map[cpu], csv_sep);
+			perf_evsel__cpus(evsel)->map[cpu], csv_sep);
 
 	fprintf(output, fmt, cpustr, msecs, csv_sep, perf_evsel__name(evsel));
 
@@ -633,7 +592,9 @@
 	return color;
 }
 
-static void print_stalled_cycles_frontend(int cpu, struct perf_evsel *evsel __used, double avg)
+static void print_stalled_cycles_frontend(int cpu,
+					  struct perf_evsel *evsel
+					  __maybe_unused, double avg)
 {
 	double total, ratio = 0.0;
 	const char *color;
@@ -650,7 +611,9 @@
 	fprintf(output, " frontend cycles idle   ");
 }
 
-static void print_stalled_cycles_backend(int cpu, struct perf_evsel *evsel __used, double avg)
+static void print_stalled_cycles_backend(int cpu,
+					 struct perf_evsel *evsel
+					 __maybe_unused, double avg)
 {
 	double total, ratio = 0.0;
 	const char *color;
@@ -667,7 +630,9 @@
 	fprintf(output, " backend  cycles idle   ");
 }
 
-static void print_branch_misses(int cpu, struct perf_evsel *evsel __used, double avg)
+static void print_branch_misses(int cpu,
+				struct perf_evsel *evsel __maybe_unused,
+				double avg)
 {
 	double total, ratio = 0.0;
 	const char *color;
@@ -684,7 +649,9 @@
 	fprintf(output, " of all branches        ");
 }
 
-static void print_l1_dcache_misses(int cpu, struct perf_evsel *evsel __used, double avg)
+static void print_l1_dcache_misses(int cpu,
+				   struct perf_evsel *evsel __maybe_unused,
+				   double avg)
 {
 	double total, ratio = 0.0;
 	const char *color;
@@ -701,7 +668,9 @@
 	fprintf(output, " of all L1-dcache hits  ");
 }
 
-static void print_l1_icache_misses(int cpu, struct perf_evsel *evsel __used, double avg)
+static void print_l1_icache_misses(int cpu,
+				   struct perf_evsel *evsel __maybe_unused,
+				   double avg)
 {
 	double total, ratio = 0.0;
 	const char *color;
@@ -718,7 +687,9 @@
 	fprintf(output, " of all L1-icache hits  ");
 }
 
-static void print_dtlb_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg)
+static void print_dtlb_cache_misses(int cpu,
+				    struct perf_evsel *evsel __maybe_unused,
+				    double avg)
 {
 	double total, ratio = 0.0;
 	const char *color;
@@ -735,7 +706,9 @@
 	fprintf(output, " of all dTLB cache hits ");
 }
 
-static void print_itlb_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg)
+static void print_itlb_cache_misses(int cpu,
+				    struct perf_evsel *evsel __maybe_unused,
+				    double avg)
 {
 	double total, ratio = 0.0;
 	const char *color;
@@ -752,7 +725,9 @@
 	fprintf(output, " of all iTLB cache hits ");
 }
 
-static void print_ll_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg)
+static void print_ll_cache_misses(int cpu,
+				  struct perf_evsel *evsel __maybe_unused,
+				  double avg)
 {
 	double total, ratio = 0.0;
 	const char *color;
@@ -785,7 +760,7 @@
 	if (no_aggr)
 		sprintf(cpustr, "CPU%*d%s",
 			csv_output ? 0 : -4,
-			evsel_list->cpus->map[cpu], csv_sep);
+			perf_evsel__cpus(evsel)->map[cpu], csv_sep);
 	else
 		cpu = 0;
 
@@ -946,14 +921,14 @@
 	u64 ena, run, val;
 	int cpu;
 
-	for (cpu = 0; cpu < evsel_list->cpus->nr; cpu++) {
+	for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
 		val = counter->counts->cpu[cpu].val;
 		ena = counter->counts->cpu[cpu].ena;
 		run = counter->counts->cpu[cpu].run;
 		if (run == 0 || ena == 0) {
 			fprintf(output, "CPU%*d%s%*s%s%*s",
 				csv_output ? 0 : -4,
-				evsel_list->cpus->map[cpu], csv_sep,
+				perf_evsel__cpus(counter)->map[cpu], csv_sep,
 				csv_output ? 0 : 18,
 				counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
 				csv_sep,
@@ -1058,8 +1033,8 @@
 	NULL
 };
 
-static int stat__set_big_num(const struct option *opt __used,
-			     const char *s __used, int unset)
+static int stat__set_big_num(const struct option *opt __maybe_unused,
+			     const char *s __maybe_unused, int unset)
 {
 	big_num_opt = unset ? 0 : 1;
 	return 0;
@@ -1153,7 +1128,7 @@
 	return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
 }
 
-int cmd_stat(int argc, const char **argv, const char *prefix __used)
+int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 {
 	struct perf_evsel *pos;
 	int status = -ENOMEM;
@@ -1189,7 +1164,7 @@
 		output = fopen(output_name, mode);
 		if (!output) {
 			perror("failed to create output file");
-			exit(-1);
+			return -1;
 		}
 		clock_gettime(CLOCK_REALTIME, &tm);
 		fprintf(output, "# started on %s\n", ctime(&tm.tv_sec));
@@ -1252,7 +1227,7 @@
 
 	list_for_each_entry(pos, &evsel_list->entries, node) {
 		if (perf_evsel__alloc_stat_priv(pos) < 0 ||
-		    perf_evsel__alloc_counts(pos, evsel_list->cpus->nr) < 0)
+		    perf_evsel__alloc_counts(pos, perf_evsel__nr_cpus(pos)) < 0)
 			goto out_free_fd;
 	}
 
diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c
index 381d5ab..4aed155 100644
--- a/tools/perf/builtin-test.c
+++ b/tools/perf/builtin-test.c
@@ -18,7 +18,8 @@
 
 #include <sys/mman.h>
 
-static int vmlinux_matches_kallsyms_filter(struct map *map __used, struct symbol *sym)
+static int vmlinux_matches_kallsyms_filter(struct map *map __maybe_unused,
+					   struct symbol *sym)
 {
 	bool *visited = symbol__priv(sym);
 	*visited = true;
@@ -996,7 +997,9 @@
 /*
  * If the RDPMC instruction faults then signal this back to the test parent task:
  */
-static void segfault_handler(int sig __used, siginfo_t *info __used, void *uc __used)
+static void segfault_handler(int sig __maybe_unused,
+			     siginfo_t *info __maybe_unused,
+			     void *uc __maybe_unused)
 {
 	exit(-1);
 }
@@ -1023,14 +1026,16 @@
 
 	fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
 	if (fd < 0) {
-		die("Error: sys_perf_event_open() syscall returned "
-		    "with %d (%s)\n", fd, strerror(errno));
+		pr_err("Error: sys_perf_event_open() syscall returned "
+		       "with %d (%s)\n", fd, strerror(errno));
+		return -1;
 	}
 
 	addr = mmap(NULL, page_size, PROT_READ, MAP_SHARED, fd, 0);
 	if (addr == (void *)(-1)) {
-		die("Error: mmap() syscall returned "
-		    "with (%s)\n", strerror(errno));
+		pr_err("Error: mmap() syscall returned with (%s)\n",
+		       strerror(errno));
+		goto out_close;
 	}
 
 	for (n = 0; n < 6; n++) {
@@ -1051,9 +1056,9 @@
 	}
 
 	munmap(addr, page_size);
-	close(fd);
-
 	pr_debug("   ");
+out_close:
+	close(fd);
 
 	if (!delta_sum)
 		return -1;
@@ -1092,6 +1097,116 @@
 	return perf_pmu__test();
 }
 
+static int perf_evsel__roundtrip_cache_name_test(void)
+{
+	char name[128];
+	int type, op, err = 0, ret = 0, i, idx;
+	struct perf_evsel *evsel;
+        struct perf_evlist *evlist = perf_evlist__new(NULL, NULL);
+
+        if (evlist == NULL)
+                return -ENOMEM;
+
+	for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {
+		for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {
+			/* skip invalid cache type */
+			if (!perf_evsel__is_cache_op_valid(type, op))
+				continue;
+
+			for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
+				__perf_evsel__hw_cache_type_op_res_name(type, op, i,
+									name, sizeof(name));
+				err = parse_events(evlist, name, 0);
+				if (err)
+					ret = err;
+			}
+		}
+	}
+
+	idx = 0;
+	evsel = perf_evlist__first(evlist);
+
+	for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {
+		for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {
+			/* skip invalid cache type */
+			if (!perf_evsel__is_cache_op_valid(type, op))
+				continue;
+
+			for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
+				__perf_evsel__hw_cache_type_op_res_name(type, op, i,
+									name, sizeof(name));
+				if (evsel->idx != idx)
+					continue;
+
+				++idx;
+
+				if (strcmp(perf_evsel__name(evsel), name)) {
+					pr_debug("%s != %s\n", perf_evsel__name(evsel), name);
+					ret = -1;
+				}
+
+				evsel = perf_evsel__next(evsel);
+			}
+		}
+	}
+
+	perf_evlist__delete(evlist);
+	return ret;
+}
+
+static int __perf_evsel__name_array_test(const char *names[], int nr_names)
+{
+	int i, err;
+	struct perf_evsel *evsel;
+        struct perf_evlist *evlist = perf_evlist__new(NULL, NULL);
+
+        if (evlist == NULL)
+                return -ENOMEM;
+
+	for (i = 0; i < nr_names; ++i) {
+		err = parse_events(evlist, names[i], 0);
+		if (err) {
+			pr_debug("failed to parse event '%s', err %d\n",
+				 names[i], err);
+			goto out_delete_evlist;
+		}
+	}
+
+	err = 0;
+	list_for_each_entry(evsel, &evlist->entries, node) {
+		if (strcmp(perf_evsel__name(evsel), names[evsel->idx])) {
+			--err;
+			pr_debug("%s != %s\n", perf_evsel__name(evsel), names[evsel->idx]);
+		}
+	}
+
+out_delete_evlist:
+	perf_evlist__delete(evlist);
+	return err;
+}
+
+#define perf_evsel__name_array_test(names) \
+	__perf_evsel__name_array_test(names, ARRAY_SIZE(names))
+
+static int perf_evsel__roundtrip_name_test(void)
+{
+	int err = 0, ret = 0;
+
+	err = perf_evsel__name_array_test(perf_evsel__hw_names);
+	if (err)
+		ret = err;
+
+	err = perf_evsel__name_array_test(perf_evsel__sw_names);
+	if (err)
+		ret = err;
+
+	err = perf_evsel__roundtrip_cache_name_test();
+	if (err)
+		ret = err;
+
+	return ret;
+}
+
 static struct test {
 	const char *desc;
 	int (*func)(void);
@@ -1135,6 +1250,10 @@
 		.func = dso__test_data,
 	},
 	{
+		.desc = "roundtrip evsel->name check",
+		.func = perf_evsel__roundtrip_name_test,
+	},
+	{
 		.func = NULL,
 	},
 };
@@ -1199,7 +1318,7 @@
 	return 0;
 }
 
-int cmd_test(int argc, const char **argv, const char *prefix __used)
+int cmd_test(int argc, const char **argv, const char *prefix __maybe_unused)
 {
 	const char * const test_usage[] = {
 	"perf test [<options>] [{list <test-name-fragment>|[<test-name-fragments>|<test-numbers>]}]",
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index 3b75b2e..55a3a6c 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -275,28 +275,28 @@
 static u64 cpus_pstate_start_times[MAX_CPUS];
 static u64 cpus_pstate_state[MAX_CPUS];
 
-static int process_comm_event(struct perf_tool *tool __used,
+static int process_comm_event(struct perf_tool *tool __maybe_unused,
 			      union perf_event *event,
-			      struct perf_sample *sample __used,
-			      struct machine *machine __used)
+			      struct perf_sample *sample __maybe_unused,
+			      struct machine *machine __maybe_unused)
 {
 	pid_set_comm(event->comm.tid, event->comm.comm);
 	return 0;
 }
 
-static int process_fork_event(struct perf_tool *tool __used,
+static int process_fork_event(struct perf_tool *tool __maybe_unused,
 			      union perf_event *event,
-			      struct perf_sample *sample __used,
-			      struct machine *machine __used)
+			      struct perf_sample *sample __maybe_unused,
+			      struct machine *machine __maybe_unused)
 {
 	pid_fork(event->fork.pid, event->fork.ppid, event->fork.time);
 	return 0;
 }
 
-static int process_exit_event(struct perf_tool *tool __used,
+static int process_exit_event(struct perf_tool *tool __maybe_unused,
 			      union perf_event *event,
-			      struct perf_sample *sample __used,
-			      struct machine *machine __used)
+			      struct perf_sample *sample __maybe_unused,
+			      struct machine *machine __maybe_unused)
 {
 	pid_exit(event->fork.pid, event->fork.time);
 	return 0;
@@ -491,11 +491,11 @@
 }
 
 
-static int process_sample_event(struct perf_tool *tool __used,
-				union perf_event *event __used,
+static int process_sample_event(struct perf_tool *tool __maybe_unused,
+				union perf_event *event __maybe_unused,
 				struct perf_sample *sample,
 				struct perf_evsel *evsel,
-				struct machine *machine __used)
+				struct machine *machine __maybe_unused)
 {
 	struct trace_entry *te;
 
@@ -1081,7 +1081,8 @@
 }
 
 static int
-parse_process(const struct option *opt __used, const char *arg, int __used unset)
+parse_process(const struct option *opt __maybe_unused, const char *arg,
+	      int __maybe_unused unset)
 {
 	if (arg)
 		add_process_filter(arg);
@@ -1106,7 +1107,8 @@
 };
 
 
-int cmd_timechart(int argc, const char **argv, const char *prefix __used)
+int cmd_timechart(int argc, const char **argv,
+		  const char *prefix __maybe_unused)
 {
 	argc = parse_options(argc, argv, options, timechart_usage,
 			PARSE_OPT_STOP_AT_NON_OPTION);
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 0513aaa..5550754 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -95,7 +95,8 @@
 		top->print_entries -= 9;
 }
 
-static void perf_top__sig_winch(int sig __used, siginfo_t *info __used, void *arg)
+static void perf_top__sig_winch(int sig __maybe_unused,
+				siginfo_t *info __maybe_unused, void *arg)
 {
 	struct perf_top *top = arg;
 
@@ -663,7 +664,7 @@
 	NULL
 };
 
-static int symbol_filter(struct map *map __used, struct symbol *sym)
+static int symbol_filter(struct map *map __maybe_unused, struct symbol *sym)
 {
 	const char *name = sym->name;
 	int i;
@@ -1163,7 +1164,7 @@
 	NULL
 };
 
-int cmd_top(int argc, const char **argv, const char *prefix __used)
+int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 {
 	struct perf_evsel *pos;
 	int status;
diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h
index b382bd5..3ea74ed 100644
--- a/tools/perf/builtin.h
+++ b/tools/perf/builtin.h
@@ -36,4 +36,5 @@
 extern int cmd_test(int argc, const char **argv, const char *prefix);
 extern int cmd_inject(int argc, const char **argv, const char *prefix);
 
+extern int find_scripts(char **scripts_array, char **scripts_path_array);
 #endif
diff --git a/tools/perf/config/feature-tests.mak b/tools/perf/config/feature-tests.mak
index 2f1156a..116690a 100644
--- a/tools/perf/config/feature-tests.mak
+++ b/tools/perf/config/feature-tests.mak
@@ -179,3 +179,17 @@
 }
 endef
 endif
+
+ifndef NO_BACKTRACE
+define SOURCE_BACKTRACE
+#include <execinfo.h>
+#include <stdio.h>
+
+int main(void)
+{
+	backtrace(NULL, 0);
+	backtrace_symbols(NULL, 0);
+	return 0;
+}
+endef
+endif
diff --git a/tools/perf/perf-archive.sh b/tools/perf/perf-archive.sh
index 95b6f8b..e919306 100644
--- a/tools/perf/perf-archive.sh
+++ b/tools/perf/perf-archive.sh
@@ -24,7 +24,7 @@
 perf buildid-list -i $PERF_DATA --with-hits | grep -v "^$NOBUILDID " > $BUILDIDS
 if [ ! -s $BUILDIDS ] ; then
 	echo "perf archive: no build-ids found"
-	rm -f $BUILDIDS
+	rm $BUILDIDS || true
 	exit 1
 fi
 
@@ -39,8 +39,8 @@
 	echo ${filename#$PERF_BUILDID_LINKDIR} >> $MANIFEST
 done
 
-tar cfj $PERF_DATA.tar.bz2 -C $PERF_BUILDID_DIR -T $MANIFEST
-rm -f $MANIFEST $BUILDIDS
+tar cjf $PERF_DATA.tar.bz2 -C $PERF_BUILDID_DIR -T $MANIFEST
+rm $MANIFEST $BUILDIDS || true
 echo -e "Now please run:\n"
 echo -e "$ tar xvf $PERF_DATA.tar.bz2 -C ~/.debug\n"
 echo "wherever you need to run 'perf report' on."
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index e7840e5..fb8578c 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -14,6 +14,7 @@
 #include "util/run-command.h"
 #include "util/parse-events.h"
 #include "util/debugfs.h"
+#include <pthread.h>
 
 const char perf_usage_string[] =
 	"perf [--version] [--help] COMMAND [ARGS]";
diff --git a/tools/perf/scripts/python/bin/event_analyzing_sample-record b/tools/perf/scripts/python/bin/event_analyzing_sample-record
new file mode 100644
index 0000000..5ce652d
--- /dev/null
+++ b/tools/perf/scripts/python/bin/event_analyzing_sample-record
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+#
+# event_analyzing_sample.py can cover all type of perf samples including
+# the tracepoints, so no special record requirements, just record what
+# you want to analyze.
+#
+perf record $@
diff --git a/tools/perf/scripts/python/bin/event_analyzing_sample-report b/tools/perf/scripts/python/bin/event_analyzing_sample-report
new file mode 100644
index 0000000..0941fc9
--- /dev/null
+++ b/tools/perf/scripts/python/bin/event_analyzing_sample-report
@@ -0,0 +1,3 @@
+#!/bin/bash
+# description: analyze all perf samples
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/event_analyzing_sample.py
diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c
index 1818a53..4aeb7d5 100644
--- a/tools/perf/ui/browser.c
+++ b/tools/perf/ui/browser.c
@@ -269,7 +269,7 @@
 	return err ? 0 : -1;
 }
 
-void ui_browser__hide(struct ui_browser *browser __used)
+void ui_browser__hide(struct ui_browser *browser __maybe_unused)
 {
 	pthread_mutex_lock(&ui__lock);
 	ui_helpline__pop();
@@ -518,7 +518,7 @@
 
 
 static int ui_browser__color_config(const char *var, const char *value,
-				    void *data __used)
+				    void *data __maybe_unused)
 {
 	char *fg = NULL, *bg;
 	int i;
@@ -602,7 +602,8 @@
 	SLsmg_set_char_set(0);
 }
 
-void ui_browser__write_graph(struct ui_browser *browser __used, int graph)
+void ui_browser__write_graph(struct ui_browser *browser __maybe_unused,
+			     int graph)
 {
 	SLsmg_set_char_set(1);
 	SLsmg_write_char(graph);
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 67a2703..8f8cd2d 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -54,7 +54,8 @@
 	return (struct browser_disasm_line *)(dl + 1);
 }
 
-static bool disasm_line__filter(struct ui_browser *browser __used, void *entry)
+static bool disasm_line__filter(struct ui_browser *browser __maybe_unused,
+				void *entry)
 {
 	if (annotate_browser__opts.hide_src_code) {
 		struct disasm_line *dl = list_entry(entry, struct disasm_line, node);
@@ -928,7 +929,8 @@
 	return strcmp(name, cfg->name);
 }
 
-static int annotate__config(const char *var, const char *value, void *data __used)
+static int annotate__config(const char *var, const char *value,
+			    void *data __maybe_unused)
 {
 	struct annotate__config *cfg;
 	const char *name;
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 81bd8c2..a21f40b 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -28,6 +28,8 @@
 	bool		     has_symbols;
 };
 
+extern void hist_browser__init_hpp(void);
+
 static int hists__browser_title(struct hists *hists, char *bf, size_t size,
 				const char *ev_name);
 
@@ -563,14 +565,47 @@
 	return row - first_row;
 }
 
+#define HPP__COLOR_FN(_name, _field)					\
+static int hist_browser__hpp_color_ ## _name(struct perf_hpp *hpp,	\
+					     struct hist_entry *he)	\
+{									\
+	double percent = 100.0 * he->_field / hpp->total_period;	\
+	*(double *)hpp->ptr = percent;					\
+	return scnprintf(hpp->buf, hpp->size, "%6.2f%%", percent);	\
+}
+
+HPP__COLOR_FN(overhead, period)
+HPP__COLOR_FN(overhead_sys, period_sys)
+HPP__COLOR_FN(overhead_us, period_us)
+HPP__COLOR_FN(overhead_guest_sys, period_guest_sys)
+HPP__COLOR_FN(overhead_guest_us, period_guest_us)
+
+#undef HPP__COLOR_FN
+
+void hist_browser__init_hpp(void)
+{
+	perf_hpp__init(false, false);
+
+	perf_hpp__format[PERF_HPP__OVERHEAD].color =
+				hist_browser__hpp_color_overhead;
+	perf_hpp__format[PERF_HPP__OVERHEAD_SYS].color =
+				hist_browser__hpp_color_overhead_sys;
+	perf_hpp__format[PERF_HPP__OVERHEAD_US].color =
+				hist_browser__hpp_color_overhead_us;
+	perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_SYS].color =
+				hist_browser__hpp_color_overhead_guest_sys;
+	perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].color =
+				hist_browser__hpp_color_overhead_guest_us;
+}
+
 static int hist_browser__show_entry(struct hist_browser *browser,
 				    struct hist_entry *entry,
 				    unsigned short row)
 {
 	char s[256];
 	double percent;
-	int printed = 0;
-	int width = browser->b.width - 6; /* The percentage */
+	int i, printed = 0;
+	int width = browser->b.width;
 	char folded_sign = ' ';
 	bool current_entry = ui_browser__is_current_entry(&browser->b, row);
 	off_t row_offset = entry->row_offset;
@@ -586,35 +621,50 @@
 	}
 
 	if (row_offset == 0) {
-		hist_entry__sort_snprintf(entry, s, sizeof(s), browser->hists);
-		percent = (entry->period * 100.0) / browser->hists->stats.total_period;
+		struct perf_hpp hpp = {
+			.buf		= s,
+			.size		= sizeof(s),
+			.total_period	= browser->hists->stats.total_period,
+		};
 
-		ui_browser__set_percent_color(&browser->b, percent, current_entry);
 		ui_browser__gotorc(&browser->b, row, 0);
-		if (symbol_conf.use_callchain) {
-			slsmg_printf("%c ", folded_sign);
-			width -= 2;
-		}
 
-		slsmg_printf(" %5.2f%%", percent);
+		for (i = 0; i < PERF_HPP__MAX_INDEX; i++) {
+			if (!perf_hpp__format[i].cond)
+				continue;
+
+			if (i) {
+				slsmg_printf("  ");
+				width -= 2;
+			}
+
+			if (perf_hpp__format[i].color) {
+				hpp.ptr = &percent;
+				/* It will set percent for us. See HPP__COLOR_FN above. */
+				width -= perf_hpp__format[i].color(&hpp, entry);
+
+				ui_browser__set_percent_color(&browser->b, percent, current_entry);
+
+				if (i == 0 && symbol_conf.use_callchain) {
+					slsmg_printf("%c ", folded_sign);
+					width -= 2;
+				}
+
+				slsmg_printf("%s", s);
+
+				if (!current_entry || !browser->b.navkeypressed)
+					ui_browser__set_color(&browser->b, HE_COLORSET_NORMAL);
+			} else {
+				width -= perf_hpp__format[i].entry(&hpp, entry);
+				slsmg_printf("%s", s);
+			}
+		}
 
 		/* The scroll bar isn't being used */
 		if (!browser->b.navkeypressed)
 			width += 1;
 
-		if (!current_entry || !browser->b.navkeypressed)
-			ui_browser__set_color(&browser->b, HE_COLORSET_NORMAL);
-
-		if (symbol_conf.show_nr_samples) {
-			slsmg_printf(" %11u", entry->nr_events);
-			width -= 12;
-		}
-
-		if (symbol_conf.show_total_period) {
-			slsmg_printf(" %12" PRIu64, entry->period);
-			width -= 13;
-		}
-
+		hist_entry__sort_snprintf(entry, s, sizeof(s), browser->hists);
 		slsmg_write_nstring(s, width);
 		++row;
 		++printed;
diff --git a/tools/perf/ui/gtk/browser.c b/tools/perf/ui/gtk/browser.c
index 26b5b65..7ff99ec 100644
--- a/tools/perf/ui/gtk/browser.c
+++ b/tools/perf/ui/gtk/browser.c
@@ -36,6 +36,57 @@
 	gtk_window_resize(GTK_WINDOW(window), width, height);
 }
 
+static const char *perf_gtk__get_percent_color(double percent)
+{
+	if (percent >= MIN_RED)
+		return "<span fgcolor='red'>";
+	if (percent >= MIN_GREEN)
+		return "<span fgcolor='dark green'>";
+	return NULL;
+}
+
+#define HPP__COLOR_FN(_name, _field)						\
+static int perf_gtk__hpp_color_ ## _name(struct perf_hpp *hpp,			\
+					 struct hist_entry *he)			\
+{										\
+	double percent = 100.0 * he->_field / hpp->total_period;		\
+	const char *markup;							\
+	int ret = 0;								\
+										\
+	markup = perf_gtk__get_percent_color(percent);				\
+	if (markup)								\
+		ret += scnprintf(hpp->buf, hpp->size, "%s", markup);		\
+	ret += scnprintf(hpp->buf + ret, hpp->size - ret, "%6.2f%%", percent); 	\
+	if (markup)								\
+		ret += scnprintf(hpp->buf + ret, hpp->size - ret, "</span>"); 	\
+										\
+	return ret;								\
+}
+
+HPP__COLOR_FN(overhead, period)
+HPP__COLOR_FN(overhead_sys, period_sys)
+HPP__COLOR_FN(overhead_us, period_us)
+HPP__COLOR_FN(overhead_guest_sys, period_guest_sys)
+HPP__COLOR_FN(overhead_guest_us, period_guest_us)
+
+#undef HPP__COLOR_FN
+
+void perf_gtk__init_hpp(void)
+{
+	perf_hpp__init(false, false);
+
+	perf_hpp__format[PERF_HPP__OVERHEAD].color =
+				perf_gtk__hpp_color_overhead;
+	perf_hpp__format[PERF_HPP__OVERHEAD_SYS].color =
+				perf_gtk__hpp_color_overhead_sys;
+	perf_hpp__format[PERF_HPP__OVERHEAD_US].color =
+				perf_gtk__hpp_color_overhead_us;
+	perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_SYS].color =
+				perf_gtk__hpp_color_overhead_guest_sys;
+	perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].color =
+				perf_gtk__hpp_color_overhead_guest_us;
+}
+
 static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists)
 {
 	GType col_types[MAX_COLUMNS];
@@ -43,15 +94,25 @@
 	struct sort_entry *se;
 	GtkListStore *store;
 	struct rb_node *nd;
-	u64 total_period;
 	GtkWidget *view;
-	int col_idx;
+	int i, col_idx;
 	int nr_cols;
+	char s[512];
+
+	struct perf_hpp hpp = {
+		.buf		= s,
+		.size		= sizeof(s),
+		.total_period	= hists->stats.total_period,
+	};
 
 	nr_cols = 0;
 
-	/* The percentage column */
-	col_types[nr_cols++] = G_TYPE_STRING;
+	for (i = 0; i < PERF_HPP__MAX_INDEX; i++) {
+		if (!perf_hpp__format[i].cond)
+			continue;
+
+		col_types[nr_cols++] = G_TYPE_STRING;
+	}
 
 	list_for_each_entry(se, &hist_entry__sort_list, list) {
 		if (se->elide)
@@ -68,11 +129,17 @@
 
 	col_idx = 0;
 
-	/* The percentage column */
-	gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view),
-						    -1, "Overhead (%)",
-						    renderer, "text",
-						    col_idx++, NULL);
+	for (i = 0; i < PERF_HPP__MAX_INDEX; i++) {
+		if (!perf_hpp__format[i].cond)
+			continue;
+
+		perf_hpp__format[i].header(&hpp);
+
+		gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view),
+							    -1, s,
+							    renderer, "markup",
+							    col_idx++, NULL);
+	}
 
 	list_for_each_entry(se, &hist_entry__sort_list, list) {
 		if (se->elide)
@@ -88,13 +155,9 @@
 
 	g_object_unref(GTK_TREE_MODEL(store));
 
-	total_period = hists->stats.total_period;
-
 	for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
 		struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
 		GtkTreeIter iter;
-		double percent;
-		char s[512];
 
 		if (h->filtered)
 			continue;
@@ -103,11 +166,17 @@
 
 		col_idx = 0;
 
-		percent = (h->period * 100.0) / total_period;
+		for (i = 0; i < PERF_HPP__MAX_INDEX; i++) {
+			if (!perf_hpp__format[i].cond)
+				continue;
 
-		snprintf(s, ARRAY_SIZE(s), "%.2f", percent);
+			if (perf_hpp__format[i].color)
+				perf_hpp__format[i].color(&hpp, h);
+			else
+				perf_hpp__format[i].entry(&hpp, h);
 
-		gtk_list_store_set(store, &iter, col_idx++, s, -1);
+			gtk_list_store_set(store, &iter, col_idx++, s, -1);
+		}
 
 		list_for_each_entry(se, &hist_entry__sort_list, list) {
 			if (se->elide)
@@ -168,8 +237,9 @@
 
 int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist,
 				  const char *help,
-				  void (*timer) (void *arg)__used,
-				  void *arg __used, int delay_secs __used)
+				  void (*timer) (void *arg)__maybe_unused,
+				  void *arg __maybe_unused,
+				  int delay_secs __maybe_unused)
 {
 	struct perf_evsel *pos;
 	GtkWidget *vbox;
diff --git a/tools/perf/ui/gtk/gtk.h b/tools/perf/ui/gtk/gtk.h
index 793cb61..687af0b 100644
--- a/tools/perf/ui/gtk/gtk.h
+++ b/tools/perf/ui/gtk/gtk.h
@@ -30,6 +30,7 @@
 int perf_gtk__deactivate_context(struct perf_gtk_context **ctx);
 
 void perf_gtk__init_helpline(void);
+void perf_gtk__init_hpp(void);
 
 #ifndef HAVE_GTK_INFO_BAR
 static inline GtkWidget *perf_gtk__setup_info_bar(void)
diff --git a/tools/perf/ui/gtk/setup.c b/tools/perf/ui/gtk/setup.c
index ec1ee26..3c4c6ef 100644
--- a/tools/perf/ui/gtk/setup.c
+++ b/tools/perf/ui/gtk/setup.c
@@ -8,10 +8,11 @@
 {
 	perf_error__register(&perf_gtk_eops);
 	perf_gtk__init_helpline();
+	perf_gtk__init_hpp();
 	return gtk_init_check(NULL, NULL) ? 0 : -1;
 }
 
-void perf_gtk__exit(bool wait_for_ok __used)
+void perf_gtk__exit(bool wait_for_ok __maybe_unused)
 {
 	if (!perf_gtk__is_active_context(pgctx))
 		return;
diff --git a/tools/perf/ui/gtk/util.c b/tools/perf/ui/gtk/util.c
index b8efb96..8aada5b 100644
--- a/tools/perf/ui/gtk/util.c
+++ b/tools/perf/ui/gtk/util.c
@@ -117,8 +117,8 @@
  *        For now, just add stubs for NO_NEWT=1 build.
  */
 #ifdef NO_NEWT_SUPPORT
-void ui_progress__update(u64 curr __used, u64 total __used,
-			 const char *title __used)
+void ui_progress__update(u64 curr __maybe_unused, u64 total __maybe_unused,
+			 const char *title __maybe_unused)
 {
 }
 #endif
diff --git a/tools/perf/ui/helpline.c b/tools/perf/ui/helpline.c
index 78ba28a..a49bcf3 100644
--- a/tools/perf/ui/helpline.c
+++ b/tools/perf/ui/helpline.c
@@ -12,7 +12,7 @@
 {
 }
 
-static void nop_helpline__push(const char *msg __used)
+static void nop_helpline__push(const char *msg __maybe_unused)
 {
 }
 
diff --git a/tools/perf/ui/helpline.h b/tools/perf/ui/helpline.h
index a2487f9..2b667ee 100644
--- a/tools/perf/ui/helpline.h
+++ b/tools/perf/ui/helpline.h
@@ -24,8 +24,8 @@
 extern char ui_helpline__current[512];
 
 #ifdef NO_NEWT_SUPPORT
-static inline int ui_helpline__show_help(const char *format __used,
-					 va_list ap __used)
+static inline int ui_helpline__show_help(const char *format __maybe_unused,
+					 va_list ap __maybe_unused)
 {
 	return 0;
 }
@@ -35,8 +35,8 @@
 #endif /* NO_NEWT_SUPPORT */
 
 #ifdef NO_GTK2_SUPPORT
-static inline int perf_gtk__show_helpline(const char *format __used,
-					  va_list ap __used)
+static inline int perf_gtk__show_helpline(const char *format __maybe_unused,
+					  va_list ap __maybe_unused)
 {
 	return 0;
 }
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
new file mode 100644
index 0000000..e3f8cd4
--- /dev/null
+++ b/tools/perf/ui/hist.c
@@ -0,0 +1,390 @@
+#include <math.h>
+
+#include "../util/hist.h"
+#include "../util/util.h"
+#include "../util/sort.h"
+
+
+/* hist period print (hpp) functions */
+static int hpp__header_overhead(struct perf_hpp *hpp)
+{
+	const char *fmt = hpp->ptr ? "Baseline" : "Overhead";
+
+	return scnprintf(hpp->buf, hpp->size, fmt);
+}
+
+static int hpp__width_overhead(struct perf_hpp *hpp __maybe_unused)
+{
+	return 8;
+}
+
+static int hpp__color_overhead(struct perf_hpp *hpp, struct hist_entry *he)
+{
+	double percent = 100.0 * he->period / hpp->total_period;
+
+	if (hpp->ptr) {
+		struct hists *old_hists = hpp->ptr;
+		u64 total_period = old_hists->stats.total_period;
+		u64 base_period = he->pair ? he->pair->period : 0;
+
+		if (total_period)
+			percent = 100.0 * base_period / total_period;
+		else
+			percent = 0.0;
+	}
+
+	return percent_color_snprintf(hpp->buf, hpp->size, " %6.2f%%", percent);
+}
+
+static int hpp__entry_overhead(struct perf_hpp *hpp, struct hist_entry *he)
+{
+	double percent = 100.0 * he->period / hpp->total_period;
+	const char *fmt = symbol_conf.field_sep ? "%.2f" : " %6.2f%%";
+
+	if (hpp->ptr) {
+		struct hists *old_hists = hpp->ptr;
+		u64 total_period = old_hists->stats.total_period;
+		u64 base_period = he->pair ? he->pair->period : 0;
+
+		if (total_period)
+			percent = 100.0 * base_period / total_period;
+		else
+			percent = 0.0;
+	}
+
+	return scnprintf(hpp->buf, hpp->size, fmt, percent);
+}
+
+static int hpp__header_overhead_sys(struct perf_hpp *hpp)
+{
+	const char *fmt = symbol_conf.field_sep ? "%s" : "%7s";
+
+	return scnprintf(hpp->buf, hpp->size, fmt, "sys");
+}
+
+static int hpp__width_overhead_sys(struct perf_hpp *hpp __maybe_unused)
+{
+	return 7;
+}
+
+static int hpp__color_overhead_sys(struct perf_hpp *hpp, struct hist_entry *he)
+{
+	double percent = 100.0 * he->period_sys / hpp->total_period;
+	return percent_color_snprintf(hpp->buf, hpp->size, "%6.2f%%", percent);
+}
+
+static int hpp__entry_overhead_sys(struct perf_hpp *hpp, struct hist_entry *he)
+{
+	double percent = 100.0 * he->period_sys / hpp->total_period;
+	const char *fmt = symbol_conf.field_sep ? "%.2f" : "%6.2f%%";
+
+	return scnprintf(hpp->buf, hpp->size, fmt, percent);
+}
+
+static int hpp__header_overhead_us(struct perf_hpp *hpp)
+{
+	const char *fmt = symbol_conf.field_sep ? "%s" : "%7s";
+
+	return scnprintf(hpp->buf, hpp->size, fmt, "user");
+}
+
+static int hpp__width_overhead_us(struct perf_hpp *hpp __maybe_unused)
+{
+	return 7;
+}
+
+static int hpp__color_overhead_us(struct perf_hpp *hpp, struct hist_entry *he)
+{
+	double percent = 100.0 * he->period_us / hpp->total_period;
+	return percent_color_snprintf(hpp->buf, hpp->size, "%6.2f%%", percent);
+}
+
+static int hpp__entry_overhead_us(struct perf_hpp *hpp, struct hist_entry *he)
+{
+	double percent = 100.0 * he->period_us / hpp->total_period;
+	const char *fmt = symbol_conf.field_sep ? "%.2f" : "%6.2f%%";
+
+	return scnprintf(hpp->buf, hpp->size, fmt, percent);
+}
+
+static int hpp__header_overhead_guest_sys(struct perf_hpp *hpp)
+{
+	return scnprintf(hpp->buf, hpp->size, "guest sys");
+}
+
+static int hpp__width_overhead_guest_sys(struct perf_hpp *hpp __maybe_unused)
+{
+	return 9;
+}
+
+static int hpp__color_overhead_guest_sys(struct perf_hpp *hpp,
+					 struct hist_entry *he)
+{
+	double percent = 100.0 * he->period_guest_sys / hpp->total_period;
+	return percent_color_snprintf(hpp->buf, hpp->size, " %6.2f%% ", percent);
+}
+
+static int hpp__entry_overhead_guest_sys(struct perf_hpp *hpp,
+					 struct hist_entry *he)
+{
+	double percent = 100.0 * he->period_guest_sys / hpp->total_period;
+	const char *fmt = symbol_conf.field_sep ? "%.2f" : " %6.2f%% ";
+
+	return scnprintf(hpp->buf, hpp->size, fmt, percent);
+}
+
+static int hpp__header_overhead_guest_us(struct perf_hpp *hpp)
+{
+	return scnprintf(hpp->buf, hpp->size, "guest usr");
+}
+
+static int hpp__width_overhead_guest_us(struct perf_hpp *hpp __maybe_unused)
+{
+	return 9;
+}
+
+static int hpp__color_overhead_guest_us(struct perf_hpp *hpp,
+					struct hist_entry *he)
+{
+	double percent = 100.0 * he->period_guest_us / hpp->total_period;
+	return percent_color_snprintf(hpp->buf, hpp->size, " %6.2f%% ", percent);
+}
+
+static int hpp__entry_overhead_guest_us(struct perf_hpp *hpp,
+					struct hist_entry *he)
+{
+	double percent = 100.0 * he->period_guest_us / hpp->total_period;
+	const char *fmt = symbol_conf.field_sep ? "%.2f" : " %6.2f%% ";
+
+	return scnprintf(hpp->buf, hpp->size, fmt, percent);
+}
+
+static int hpp__header_samples(struct perf_hpp *hpp)
+{
+	const char *fmt = symbol_conf.field_sep ? "%s" : "%11s";
+
+	return scnprintf(hpp->buf, hpp->size, fmt, "Samples");
+}
+
+static int hpp__width_samples(struct perf_hpp *hpp __maybe_unused)
+{
+	return 11;
+}
+
+static int hpp__entry_samples(struct perf_hpp *hpp, struct hist_entry *he)
+{
+	const char *fmt = symbol_conf.field_sep ? "%" PRIu64 : "%11" PRIu64;
+
+	return scnprintf(hpp->buf, hpp->size, fmt, he->nr_events);
+}
+
+static int hpp__header_period(struct perf_hpp *hpp)
+{
+	const char *fmt = symbol_conf.field_sep ? "%s" : "%12s";
+
+	return scnprintf(hpp->buf, hpp->size, fmt, "Period");
+}
+
+static int hpp__width_period(struct perf_hpp *hpp __maybe_unused)
+{
+	return 12;
+}
+
+static int hpp__entry_period(struct perf_hpp *hpp, struct hist_entry *he)
+{
+	const char *fmt = symbol_conf.field_sep ? "%" PRIu64 : "%12" PRIu64;
+
+	return scnprintf(hpp->buf, hpp->size, fmt, he->period);
+}
+
+static int hpp__header_delta(struct perf_hpp *hpp)
+{
+	const char *fmt = symbol_conf.field_sep ? "%s" : "%7s";
+
+	return scnprintf(hpp->buf, hpp->size, fmt, "Delta");
+}
+
+static int hpp__width_delta(struct perf_hpp *hpp __maybe_unused)
+{
+	return 7;
+}
+
+static int hpp__entry_delta(struct perf_hpp *hpp, struct hist_entry *he)
+{
+	struct hists *pair_hists = hpp->ptr;
+	u64 old_total, new_total;
+	double old_percent = 0, new_percent = 0;
+	double diff;
+	const char *fmt = symbol_conf.field_sep ? "%s" : "%7.7s";
+	char buf[32] = " ";
+
+	old_total = pair_hists->stats.total_period;
+	if (old_total > 0 && he->pair)
+		old_percent = 100.0 * he->pair->period / old_total;
+
+	new_total = hpp->total_period;
+	if (new_total > 0)
+		new_percent = 100.0 * he->period / new_total;
+
+	diff = new_percent - old_percent;
+	if (fabs(diff) >= 0.01)
+		scnprintf(buf, sizeof(buf), "%+4.2F%%", diff);
+
+	return scnprintf(hpp->buf, hpp->size, fmt, buf);
+}
+
+static int hpp__header_displ(struct perf_hpp *hpp)
+{
+	return scnprintf(hpp->buf, hpp->size, "Displ.");
+}
+
+static int hpp__width_displ(struct perf_hpp *hpp __maybe_unused)
+{
+	return 6;
+}
+
+static int hpp__entry_displ(struct perf_hpp *hpp,
+			    struct hist_entry *he __maybe_unused)
+{
+	const char *fmt = symbol_conf.field_sep ? "%s" : "%6.6s";
+	char buf[32] = " ";
+
+	if (hpp->displacement)
+		scnprintf(buf, sizeof(buf), "%+4ld", hpp->displacement);
+
+	return scnprintf(hpp->buf, hpp->size, fmt, buf);
+}
+
+#define HPP__COLOR_PRINT_FNS(_name)		\
+	.header	= hpp__header_ ## _name,		\
+	.width	= hpp__width_ ## _name,		\
+	.color	= hpp__color_ ## _name,		\
+	.entry	= hpp__entry_ ## _name
+
+#define HPP__PRINT_FNS(_name)			\
+	.header	= hpp__header_ ## _name,		\
+	.width	= hpp__width_ ## _name,		\
+	.entry	= hpp__entry_ ## _name
+
+struct perf_hpp_fmt perf_hpp__format[] = {
+	{ .cond = true,  HPP__COLOR_PRINT_FNS(overhead) },
+	{ .cond = false, HPP__COLOR_PRINT_FNS(overhead_sys) },
+	{ .cond = false, HPP__COLOR_PRINT_FNS(overhead_us) },
+	{ .cond = false, HPP__COLOR_PRINT_FNS(overhead_guest_sys) },
+	{ .cond = false, HPP__COLOR_PRINT_FNS(overhead_guest_us) },
+	{ .cond = false, HPP__PRINT_FNS(samples) },
+	{ .cond = false, HPP__PRINT_FNS(period) },
+	{ .cond = false, HPP__PRINT_FNS(delta) },
+	{ .cond = false, HPP__PRINT_FNS(displ) }
+};
+
+#undef HPP__COLOR_PRINT_FNS
+#undef HPP__PRINT_FNS
+
+void perf_hpp__init(bool need_pair, bool show_displacement)
+{
+	if (symbol_conf.show_cpu_utilization) {
+		perf_hpp__format[PERF_HPP__OVERHEAD_SYS].cond = true;
+		perf_hpp__format[PERF_HPP__OVERHEAD_US].cond = true;
+
+		if (perf_guest) {
+			perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_SYS].cond = true;
+			perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].cond = true;
+		}
+	}
+
+	if (symbol_conf.show_nr_samples)
+		perf_hpp__format[PERF_HPP__SAMPLES].cond = true;
+
+	if (symbol_conf.show_total_period)
+		perf_hpp__format[PERF_HPP__PERIOD].cond = true;
+
+	if (need_pair) {
+		perf_hpp__format[PERF_HPP__DELTA].cond = true;
+
+		if (show_displacement)
+			perf_hpp__format[PERF_HPP__DISPL].cond = true;
+	}
+}
+
+static inline void advance_hpp(struct perf_hpp *hpp, int inc)
+{
+	hpp->buf  += inc;
+	hpp->size -= inc;
+}
+
+int hist_entry__period_snprintf(struct perf_hpp *hpp, struct hist_entry *he,
+				bool color)
+{
+	const char *sep = symbol_conf.field_sep;
+	char *start = hpp->buf;
+	int i, ret;
+
+	if (symbol_conf.exclude_other && !he->parent)
+		return 0;
+
+	for (i = 0; i < PERF_HPP__MAX_INDEX; i++) {
+		if (!perf_hpp__format[i].cond)
+			continue;
+
+		if (!sep || i > 0) {
+			ret = scnprintf(hpp->buf, hpp->size, "%s", sep ?: "  ");
+			advance_hpp(hpp, ret);
+		}
+
+		if (color && perf_hpp__format[i].color)
+			ret = perf_hpp__format[i].color(hpp, he);
+		else
+			ret = perf_hpp__format[i].entry(hpp, he);
+
+		advance_hpp(hpp, ret);
+	}
+
+	return hpp->buf - start;
+}
+
+int hist_entry__sort_snprintf(struct hist_entry *he, char *s, size_t size,
+			      struct hists *hists)
+{
+	const char *sep = symbol_conf.field_sep;
+	struct sort_entry *se;
+	int ret = 0;
+
+	list_for_each_entry(se, &hist_entry__sort_list, list) {
+		if (se->elide)
+			continue;
+
+		ret += scnprintf(s + ret, size - ret, "%s", sep ?: "  ");
+		ret += se->se_snprintf(he, s + ret, size - ret,
+				       hists__col_len(hists, se->se_width_idx));
+	}
+
+	return ret;
+}
+
+/*
+ * See hists__fprintf to match the column widths
+ */
+unsigned int hists__sort_list_width(struct hists *hists)
+{
+	struct sort_entry *se;
+	int i, ret = 0;
+
+	for (i = 0; i < PERF_HPP__MAX_INDEX; i++) {
+		if (!perf_hpp__format[i].cond)
+			continue;
+		if (i)
+			ret += 2;
+
+		ret += perf_hpp__format[i].width(NULL);
+	}
+
+	list_for_each_entry(se, &hist_entry__sort_list, list)
+		if (!se->elide)
+			ret += 2 + hists__col_len(hists, se->se_width_idx);
+
+	if (verbose) /* Addr + origin */
+		ret += 3 + BITS_PER_LONG / 4;
+
+	return ret;
+}
diff --git a/tools/perf/ui/setup.c b/tools/perf/ui/setup.c
index c7820e5..bd7d460 100644
--- a/tools/perf/ui/setup.c
+++ b/tools/perf/ui/setup.c
@@ -1,8 +1,8 @@
 #include <pthread.h>
 
-#include "../cache.h"
-#include "../debug.h"
-
+#include "../util/cache.h"
+#include "../util/debug.h"
+#include "../util/hist.h"
 
 pthread_mutex_t ui__lock = PTHREAD_MUTEX_INITIALIZER;
 
@@ -29,6 +29,8 @@
 		use_browser = 0;
 		if (fallback_to_pager)
 			setup_pager();
+
+		perf_hpp__init(false, false);
 		break;
 	}
 }
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index 9bf7e9e..882461a 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -1,5 +1,4 @@
 #include <stdio.h>
-#include <math.h>
 
 #include "../../util/util.h"
 #include "../../util/hist.h"
@@ -291,138 +290,6 @@
 	return 0;
 }
 
-static int hist_entry__period_snprintf(struct hist_entry *he, char *s,
-				     size_t size, struct hists *pair_hists,
-				     bool show_displacement, long displacement,
-				     bool color, u64 total_period)
-{
-	u64 period, total, period_sys, period_us, period_guest_sys, period_guest_us;
-	u64 nr_events;
-	const char *sep = symbol_conf.field_sep;
-	int ret;
-
-	if (symbol_conf.exclude_other && !he->parent)
-		return 0;
-
-	if (pair_hists) {
-		period = he->pair ? he->pair->period : 0;
-		nr_events = he->pair ? he->pair->nr_events : 0;
-		total = pair_hists->stats.total_period;
-		period_sys = he->pair ? he->pair->period_sys : 0;
-		period_us = he->pair ? he->pair->period_us : 0;
-		period_guest_sys = he->pair ? he->pair->period_guest_sys : 0;
-		period_guest_us = he->pair ? he->pair->period_guest_us : 0;
-	} else {
-		period = he->period;
-		nr_events = he->nr_events;
-		total = total_period;
-		period_sys = he->period_sys;
-		period_us = he->period_us;
-		period_guest_sys = he->period_guest_sys;
-		period_guest_us = he->period_guest_us;
-	}
-
-	if (total) {
-		if (color)
-			ret = percent_color_snprintf(s, size,
-						     sep ? "%.2f" : "   %6.2f%%",
-						     (period * 100.0) / total);
-		else
-			ret = scnprintf(s, size, sep ? "%.2f" : "   %6.2f%%",
-				       (period * 100.0) / total);
-		if (symbol_conf.show_cpu_utilization) {
-			ret += percent_color_snprintf(s + ret, size - ret,
-					sep ? "%.2f" : "   %6.2f%%",
-					(period_sys * 100.0) / total);
-			ret += percent_color_snprintf(s + ret, size - ret,
-					sep ? "%.2f" : "   %6.2f%%",
-					(period_us * 100.0) / total);
-			if (perf_guest) {
-				ret += percent_color_snprintf(s + ret,
-						size - ret,
-						sep ? "%.2f" : "   %6.2f%%",
-						(period_guest_sys * 100.0) /
-								total);
-				ret += percent_color_snprintf(s + ret,
-						size - ret,
-						sep ? "%.2f" : "   %6.2f%%",
-						(period_guest_us * 100.0) /
-								total);
-			}
-		}
-	} else
-		ret = scnprintf(s, size, sep ? "%" PRIu64 : "%12" PRIu64 " ", period);
-
-	if (symbol_conf.show_nr_samples) {
-		if (sep)
-			ret += scnprintf(s + ret, size - ret, "%c%" PRIu64, *sep, nr_events);
-		else
-			ret += scnprintf(s + ret, size - ret, "%11" PRIu64, nr_events);
-	}
-
-	if (symbol_conf.show_total_period) {
-		if (sep)
-			ret += scnprintf(s + ret, size - ret, "%c%" PRIu64, *sep, period);
-		else
-			ret += scnprintf(s + ret, size - ret, " %12" PRIu64, period);
-	}
-
-	if (pair_hists) {
-		char bf[32];
-		double old_percent = 0, new_percent = 0, diff;
-
-		if (total > 0)
-			old_percent = (period * 100.0) / total;
-		if (total_period > 0)
-			new_percent = (he->period * 100.0) / total_period;
-
-		diff = new_percent - old_percent;
-
-		if (fabs(diff) >= 0.01)
-			scnprintf(bf, sizeof(bf), "%+4.2F%%", diff);
-		else
-			scnprintf(bf, sizeof(bf), " ");
-
-		if (sep)
-			ret += scnprintf(s + ret, size - ret, "%c%s", *sep, bf);
-		else
-			ret += scnprintf(s + ret, size - ret, "%11.11s", bf);
-
-		if (show_displacement) {
-			if (displacement)
-				scnprintf(bf, sizeof(bf), "%+4ld", displacement);
-			else
-				scnprintf(bf, sizeof(bf), " ");
-
-			if (sep)
-				ret += scnprintf(s + ret, size - ret, "%c%s", *sep, bf);
-			else
-				ret += scnprintf(s + ret, size - ret, "%6.6s", bf);
-		}
-	}
-
-	return ret;
-}
-
-int hist_entry__sort_snprintf(struct hist_entry *he, char *s, size_t size,
-			      struct hists *hists)
-{
-	const char *sep = symbol_conf.field_sep;
-	struct sort_entry *se;
-	int ret = 0;
-
-	list_for_each_entry(se, &hist_entry__sort_list, list) {
-		if (se->elide)
-			continue;
-
-		ret += scnprintf(s + ret, size - ret, "%s", sep ?: "  ");
-		ret += se->se_snprintf(he, s + ret, size - ret,
-				       hists__col_len(hists, se->se_width_idx));
-	}
-
-	return ret;
-}
-
 static size_t hist_entry__callchain_fprintf(struct hist_entry *he,
 					    struct hists *hists,
 					    u64 total_period, FILE *fp)
@@ -441,18 +308,23 @@
 
 static int hist_entry__fprintf(struct hist_entry *he, size_t size,
 			       struct hists *hists, struct hists *pair_hists,
-			       bool show_displacement, long displacement,
-			       u64 total_period, FILE *fp)
+			       long displacement, u64 total_period, FILE *fp)
 {
 	char bf[512];
 	int ret;
+	struct perf_hpp hpp = {
+		.buf		= bf,
+		.size		= size,
+		.total_period	= total_period,
+		.displacement	= displacement,
+		.ptr		= pair_hists,
+	};
+	bool color = !symbol_conf.field_sep;
 
 	if (size == 0 || size > sizeof(bf))
-		size = sizeof(bf);
+		size = hpp.size = sizeof(bf);
 
-	ret = hist_entry__period_snprintf(he, bf, size, pair_hists,
-					  show_displacement, displacement,
-					  true, total_period);
+	ret = hist_entry__period_snprintf(&hpp, he, color);
 	hist_entry__sort_snprintf(he, bf + ret, size - ret, hists);
 
 	ret = fprintf(fp, "%s\n", bf);
@@ -477,59 +349,29 @@
 	unsigned int width;
 	const char *sep = symbol_conf.field_sep;
 	const char *col_width = symbol_conf.col_width_list_str;
-	int nr_rows = 0;
+	int idx, nr_rows = 0;
+	char bf[64];
+	struct perf_hpp dummy_hpp = {
+		.buf	= bf,
+		.size	= sizeof(bf),
+		.ptr	= pair,
+	};
 
 	init_rem_hits();
 
 	if (!show_header)
 		goto print_entries;
 
-	fprintf(fp, "# %s", pair ? "Baseline" : "Overhead");
+	fprintf(fp, "# ");
+	for (idx = 0; idx < PERF_HPP__MAX_INDEX; idx++) {
+		if (!perf_hpp__format[idx].cond)
+			continue;
 
-	if (symbol_conf.show_cpu_utilization) {
-		if (sep) {
-			ret += fprintf(fp, "%csys", *sep);
-			ret += fprintf(fp, "%cus", *sep);
-			if (perf_guest) {
-				ret += fprintf(fp, "%cguest sys", *sep);
-				ret += fprintf(fp, "%cguest us", *sep);
-			}
-		} else {
-			ret += fprintf(fp, "     sys  ");
-			ret += fprintf(fp, "      us  ");
-			if (perf_guest) {
-				ret += fprintf(fp, "  guest sys  ");
-				ret += fprintf(fp, "  guest us  ");
-			}
-		}
-	}
+		if (idx)
+			fprintf(fp, "%s", sep ?: "  ");
 
-	if (symbol_conf.show_nr_samples) {
-		if (sep)
-			fprintf(fp, "%cSamples", *sep);
-		else
-			fputs("  Samples  ", fp);
-	}
-
-	if (symbol_conf.show_total_period) {
-		if (sep)
-			ret += fprintf(fp, "%cPeriod", *sep);
-		else
-			ret += fprintf(fp, "   Period    ");
-	}
-
-	if (pair) {
-		if (sep)
-			ret += fprintf(fp, "%cDelta", *sep);
-		else
-			ret += fprintf(fp, "  Delta    ");
-
-		if (show_displacement) {
-			if (sep)
-				ret += fprintf(fp, "%cDisplacement", *sep);
-			else
-				ret += fprintf(fp, " Displ");
-		}
+		perf_hpp__format[idx].header(&dummy_hpp);
+		fprintf(fp, "%s", bf);
 	}
 
 	list_for_each_entry(se, &hist_entry__sort_list, list) {
@@ -561,18 +403,21 @@
 	if (sep)
 		goto print_entries;
 
-	fprintf(fp, "# ........");
-	if (symbol_conf.show_cpu_utilization)
-		fprintf(fp, "   .......   .......");
-	if (symbol_conf.show_nr_samples)
-		fprintf(fp, " ..........");
-	if (symbol_conf.show_total_period)
-		fprintf(fp, " ............");
-	if (pair) {
-		fprintf(fp, " ..........");
-		if (show_displacement)
-			fprintf(fp, " .....");
+	fprintf(fp, "# ");
+	for (idx = 0; idx < PERF_HPP__MAX_INDEX; idx++) {
+		unsigned int i;
+
+		if (!perf_hpp__format[idx].cond)
+			continue;
+
+		if (idx)
+			fprintf(fp, "%s", sep ?: "  ");
+
+		width = perf_hpp__format[idx].width(&dummy_hpp);
+		for (i = 0; i < width; i++)
+			fprintf(fp, ".");
 	}
+
 	list_for_each_entry(se, &hist_entry__sort_list, list) {
 		unsigned int i;
 
@@ -612,8 +457,8 @@
 				displacement = 0;
 			++position;
 		}
-		ret += hist_entry__fprintf(h, max_cols, hists, pair, show_displacement,
-					   displacement, total_period, fp);
+		ret += hist_entry__fprintf(h, max_cols, hists, pair, displacement,
+					   total_period, fp);
 
 		if (max_rows && ++nr_rows >= max_rows)
 			goto out;
diff --git a/tools/perf/ui/tui/setup.c b/tools/perf/ui/tui/setup.c
index 4c936e0..60debb8 100644
--- a/tools/perf/ui/tui/setup.c
+++ b/tools/perf/ui/tui/setup.c
@@ -15,6 +15,8 @@
 
 extern struct perf_error_ops perf_tui_eops;
 
+extern void hist_browser__init_hpp(void);
+
 void ui__refresh_dimensions(bool force)
 {
 	if (force || ui__need_resize) {
@@ -26,7 +28,7 @@
 	}
 }
 
-static void ui__sigwinch(int sig __used)
+static void ui__sigwinch(int sig __maybe_unused)
 {
 	ui__need_resize = 1;
 }
@@ -86,7 +88,7 @@
 	return SLkp_getkey();
 }
 
-static void newt_suspend(void *d __used)
+static void newt_suspend(void *d __maybe_unused)
 {
 	newtSuspend();
 	raise(SIGTSTP);
@@ -124,6 +126,8 @@
 	signal(SIGTERM, ui__signal);
 
 	perf_error__register(&perf_tui_eops);
+
+	hist_browser__init_hpp();
 out:
 	return err;
 }
diff --git a/tools/perf/util/alias.c b/tools/perf/util/alias.c
index b8144e8..e6d1347 100644
--- a/tools/perf/util/alias.c
+++ b/tools/perf/util/alias.c
@@ -3,7 +3,8 @@
 static const char *alias_key;
 static char *alias_val;
 
-static int alias_lookup_cb(const char *k, const char *v, void *cb __used)
+static int alias_lookup_cb(const char *k, const char *v,
+			   void *cb __maybe_unused)
 {
 	if (!prefixcmp(k, "alias.") && !strcmp(k+6, alias_key)) {
 		if (!v)
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 3a282c0..f0a9103 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -17,6 +17,7 @@
 #include <pthread.h>
 
 const char 	*disassembler_style;
+const char	*objdump_path;
 
 static struct ins *ins__find(const char *name);
 static int disasm_line__parse(char *line, char **namep, char **rawp);
@@ -312,8 +313,8 @@
 	.scnprintf = dec__scnprintf,
 };
 
-static int nop__scnprintf(struct ins *ins __used, char *bf, size_t size,
-			  struct ins_operands *ops __used)
+static int nop__scnprintf(struct ins *ins __maybe_unused, char *bf, size_t size,
+			  struct ins_operands *ops __maybe_unused)
 {
 	return scnprintf(bf, size, "%-6.6s", "nop");
 }
@@ -415,7 +416,7 @@
 	return bsearch(name, instructions, nmemb, sizeof(struct ins), ins__cmp);
 }
 
-int symbol__annotate_init(struct map *map __used, struct symbol *sym)
+int symbol__annotate_init(struct map *map __maybe_unused, struct symbol *sym)
 {
 	struct annotation *notes = symbol__annotation(sym);
 	pthread_mutex_init(&notes->lock, NULL);
@@ -820,9 +821,10 @@
 		 dso, dso->long_name, sym, sym->name);
 
 	snprintf(command, sizeof(command),
-		 "objdump %s%s --start-address=0x%016" PRIx64
+		 "%s %s%s --start-address=0x%016" PRIx64
 		 " --stop-address=0x%016" PRIx64
 		 " -d %s %s -C %s|grep -v %s|expand",
+		 objdump_path ? objdump_path : "objdump",
 		 disassembler_style ? "-M " : "",
 		 disassembler_style ? disassembler_style : "",
 		 map__rip_2objdump(map, sym->start),
@@ -982,7 +984,8 @@
 			    int context)
 {
 	struct dso *dso = map->dso;
-	const char *filename = dso->long_name, *d_filename;
+	char *filename;
+	const char *d_filename;
 	struct annotation *notes = symbol__annotation(sym);
 	struct disasm_line *pos, *queue = NULL;
 	u64 start = map__rip_2objdump(map, sym->start);
@@ -990,6 +993,10 @@
 	int more = 0;
 	u64 len;
 
+	filename = strdup(dso->long_name);
+	if (!filename)
+		return -ENOMEM;
+
 	if (full_paths)
 		d_filename = filename;
 	else
@@ -1040,6 +1047,8 @@
 		}
 	}
 
+	free(filename);
+
 	return more;
 }
 
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 78a5692..9b5b21e 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -7,6 +7,7 @@
 #include "symbol.h"
 #include <linux/list.h>
 #include <linux/rbtree.h>
+#include <pthread.h>
 
 struct ins;
 
@@ -125,7 +126,7 @@
 void symbol__annotate_zero_histograms(struct symbol *sym);
 
 int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize);
-int symbol__annotate_init(struct map *map __used, struct symbol *sym);
+int symbol__annotate_init(struct map *map __maybe_unused, struct symbol *sym);
 int symbol__annotate_printf(struct symbol *sym, struct map *map, int evidx,
 			    bool full_paths, int min_pcnt, int max_lines,
 			    int context);
@@ -138,11 +139,12 @@
 			 int max_lines);
 
 #ifdef NO_NEWT_SUPPORT
-static inline int symbol__tui_annotate(struct symbol *sym __used,
-				       struct map *map __used,
-				       int evidx __used,
-				       void(*timer)(void *arg) __used,
-				       void *arg __used, int delay_secs __used)
+static inline int symbol__tui_annotate(struct symbol *sym __maybe_unused,
+				       struct map *map __maybe_unused,
+				       int evidx __maybe_unused,
+				       void(*timer)(void *arg) __maybe_unused,
+				       void *arg __maybe_unused,
+				       int delay_secs __maybe_unused)
 {
 	return 0;
 }
@@ -152,5 +154,6 @@
 #endif
 
 extern const char	*disassembler_style;
+extern const char	*objdump_path;
 
 #endif	/* __PERF_ANNOTATE_H */
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index fd9a594..8e3a740 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -16,10 +16,10 @@
 #include "session.h"
 #include "tool.h"
 
-static int build_id__mark_dso_hit(struct perf_tool *tool __used,
+static int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused,
 				  union perf_event *event,
-				  struct perf_sample *sample __used,
-				  struct perf_evsel *evsel __used,
+				  struct perf_sample *sample __maybe_unused,
+				  struct perf_evsel *evsel __maybe_unused,
 				  struct machine *machine)
 {
 	struct addr_location al;
@@ -41,9 +41,10 @@
 	return 0;
 }
 
-static int perf_event__exit_del_thread(struct perf_tool *tool __used,
+static int perf_event__exit_del_thread(struct perf_tool *tool __maybe_unused,
 				       union perf_event *event,
-				       struct perf_sample *sample __used,
+				       struct perf_sample *sample
+				       __maybe_unused,
 				       struct machine *machine)
 {
 	struct thread *thread = machine__findnew_thread(machine, event->fork.tid);
diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h
index cff18c6..ab17694 100644
--- a/tools/perf/util/cache.h
+++ b/tools/perf/util/cache.h
@@ -39,7 +39,7 @@
 	if (fallback_to_pager)
 		setup_pager();
 }
-static inline void exit_browser(bool wait_for_ok __used) {}
+static inline void exit_browser(bool wait_for_ok __maybe_unused) {}
 #else
 void setup_browser(bool fallback_to_pager);
 void exit_browser(bool wait_for_ok);
@@ -49,7 +49,7 @@
 {
 	return -1;
 }
-static inline void ui__exit(bool wait_for_ok __used) {}
+static inline void ui__exit(bool wait_for_ok __maybe_unused) {}
 #else
 int ui__init(void);
 void ui__exit(bool wait_for_ok);
@@ -60,7 +60,7 @@
 {
 	return -1;
 }
-static inline void perf_gtk__exit(bool wait_for_ok __used) {}
+static inline void perf_gtk__exit(bool wait_for_ok __maybe_unused) {}
 #else
 int perf_gtk__init(void);
 void perf_gtk__exit(bool wait_for_ok);
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 3a6bff4..d3b3f5d 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -93,7 +93,7 @@
  */
 static void
 sort_chain_flat(struct rb_root *rb_root, struct callchain_root *root,
-		u64 min_hit, struct callchain_param *param __used)
+		u64 min_hit, struct callchain_param *param __maybe_unused)
 {
 	__sort_chain_flat(rb_root, &root->node, min_hit);
 }
@@ -115,7 +115,7 @@
 
 static void
 sort_chain_graph_abs(struct rb_root *rb_root, struct callchain_root *chain_root,
-		     u64 min_hit, struct callchain_param *param __used)
+		     u64 min_hit, struct callchain_param *param __maybe_unused)
 {
 	__sort_chain_graph_abs(&chain_root->node, min_hit);
 	rb_root->rb_node = chain_root->node.rb_root.rb_node;
@@ -140,7 +140,7 @@
 
 static void
 sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_root *chain_root,
-		     u64 min_hit __used, struct callchain_param *param)
+		     u64 min_hit __maybe_unused, struct callchain_param *param)
 {
 	__sort_chain_graph_rel(&chain_root->node, param->min_percent / 100.0);
 	rb_root->rb_node = chain_root->node.rb_root.rb_node;
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index dbe2f16..96bbda1 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -138,8 +138,8 @@
 	}
 }
 
-int parse_cgroups(const struct option *opt __used, const char *str,
-		  int unset __used)
+int parse_cgroups(const struct option *opt __maybe_unused, const char *str,
+		  int unset __maybe_unused)
 {
 	struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
 	const char *p, *e, *eos = str + strlen(str);
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index 6faa3a1..3e0fdd3 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -342,13 +342,15 @@
 	return value;
 }
 
-static int perf_default_core_config(const char *var __used, const char *value __used)
+static int perf_default_core_config(const char *var __maybe_unused,
+				    const char *value __maybe_unused)
 {
 	/* Add other config variables here. */
 	return 0;
 }
 
-int perf_default_config(const char *var, const char *value, void *dummy __used)
+int perf_default_config(const char *var, const char *value,
+			void *dummy __maybe_unused)
 {
 	if (!prefixcmp(var, "core."))
 		return perf_default_core_config(var, value);
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index adc72f0..2b32ffa 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -38,24 +38,19 @@
 	return cpus;
 }
 
-static struct cpu_map *cpu_map__read_all_cpu_map(void)
+struct cpu_map *cpu_map__read(FILE *file)
 {
 	struct cpu_map *cpus = NULL;
-	FILE *onlnf;
 	int nr_cpus = 0;
 	int *tmp_cpus = NULL, *tmp;
 	int max_entries = 0;
 	int n, cpu, prev;
 	char sep;
 
-	onlnf = fopen("/sys/devices/system/cpu/online", "r");
-	if (!onlnf)
-		return cpu_map__default_new();
-
 	sep = 0;
 	prev = -1;
 	for (;;) {
-		n = fscanf(onlnf, "%u%c", &cpu, &sep);
+		n = fscanf(file, "%u%c", &cpu, &sep);
 		if (n <= 0)
 			break;
 		if (prev >= 0) {
@@ -95,6 +90,19 @@
 		cpus = cpu_map__default_new();
 out_free_tmp:
 	free(tmp_cpus);
+	return cpus;
+}
+
+static struct cpu_map *cpu_map__read_all_cpu_map(void)
+{
+	struct cpu_map *cpus = NULL;
+	FILE *onlnf;
+
+	onlnf = fopen("/sys/devices/system/cpu/online", "r");
+	if (!onlnf)
+		return cpu_map__default_new();
+
+	cpus = cpu_map__read(onlnf);
 	fclose(onlnf);
 	return cpus;
 }
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index c415185..17b5264 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -11,7 +11,7 @@
 struct cpu_map *cpu_map__new(const char *cpu_list);
 struct cpu_map *cpu_map__dummy_new(void);
 void cpu_map__delete(struct cpu_map *map);
-
+struct cpu_map *cpu_map__read(FILE *file);
 size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp);
 
 #endif /* __PERF_CPUMAP_H */
diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h
index 05e660c..bb2e7d1 100644
--- a/tools/perf/util/debug.h
+++ b/tools/perf/util/debug.h
@@ -16,19 +16,20 @@
 struct perf_error_ops;
 
 #if defined(NO_NEWT_SUPPORT) && defined(NO_GTK2_SUPPORT)
-static inline void ui_progress__update(u64 curr __used, u64 total __used,
-				       const char *title __used) {}
+static inline void ui_progress__update(u64 curr __maybe_unused,
+				       u64 total __maybe_unused,
+				       const char *title __maybe_unused) {}
 
 #define ui__error(format, arg...) ui__warning(format, ##arg)
 
 static inline int
-perf_error__register(struct perf_error_ops *eops __used)
+perf_error__register(struct perf_error_ops *eops __maybe_unused)
 {
 	return 0;
 }
 
 static inline int
-perf_error__unregister(struct perf_error_ops *eops __used)
+perf_error__unregister(struct perf_error_ops *eops __maybe_unused)
 {
 	return 0;
 }
diff --git a/tools/perf/util/dso-test-data.c b/tools/perf/util/dso-test-data.c
index 541cdc7..c6caede 100644
--- a/tools/perf/util/dso-test-data.c
+++ b/tools/perf/util/dso-test-data.c
@@ -23,7 +23,7 @@
 	int fd, i;
 	unsigned char *buf;
 
-	fd = mkostemp(templ, O_CREAT|O_WRONLY|O_TRUNC);
+	fd = mkstemp(templ);
 
 	buf = malloc(size);
 	if (!buf) {
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 3a0f1a5..6715b19 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -112,7 +112,7 @@
 	event->comm.header.type = PERF_RECORD_COMM;
 
 	size = strlen(event->comm.comm) + 1;
-	size = ALIGN(size, sizeof(u64));
+	size = PERF_ALIGN(size, sizeof(u64));
 	memset(event->comm.comm + size, 0, machine->id_hdr_size);
 	event->comm.header.size = (sizeof(event->comm) -
 				(sizeof(event->comm.comm) - size) +
@@ -120,7 +120,9 @@
 	if (!full) {
 		event->comm.tid = pid;
 
-		process(tool, event, &synth_sample, machine);
+		if (process(tool, event, &synth_sample, machine) != 0)
+			return -1;
+
 		goto out;
 	}
 
@@ -143,7 +145,7 @@
 					 sizeof(event->comm.comm));
 
 		size = strlen(event->comm.comm) + 1;
-		size = ALIGN(size, sizeof(u64));
+		size = PERF_ALIGN(size, sizeof(u64));
 		memset(event->comm.comm + size, 0, machine->id_hdr_size);
 		event->comm.header.size = (sizeof(event->comm) -
 					  (sizeof(event->comm.comm) - size) +
@@ -151,7 +153,10 @@
 
 		event->comm.tid = pid;
 
-		process(tool, event, &synth_sample, machine);
+		if (process(tool, event, &synth_sample, machine) != 0) {
+			tgid = -1;
+			break;
+		}
 	}
 
 	closedir(tasks);
@@ -167,6 +172,7 @@
 {
 	char filename[PATH_MAX];
 	FILE *fp;
+	int rc = 0;
 
 	snprintf(filename, sizeof(filename), "/proc/%d/maps", pid);
 
@@ -222,7 +228,7 @@
 			size = strlen(execname);
 			execname[size - 1] = '\0'; /* Remove \n */
 			memcpy(event->mmap.filename, execname, size);
-			size = ALIGN(size, sizeof(u64));
+			size = PERF_ALIGN(size, sizeof(u64));
 			event->mmap.len -= event->mmap.start;
 			event->mmap.header.size = (sizeof(event->mmap) -
 					        (sizeof(event->mmap.filename) - size));
@@ -231,18 +237,22 @@
 			event->mmap.pid = tgid;
 			event->mmap.tid = pid;
 
-			process(tool, event, &synth_sample, machine);
+			if (process(tool, event, &synth_sample, machine) != 0) {
+				rc = -1;
+				break;
+			}
 		}
 	}
 
 	fclose(fp);
-	return 0;
+	return rc;
 }
 
 int perf_event__synthesize_modules(struct perf_tool *tool,
 				   perf_event__handler_t process,
 				   struct machine *machine)
 {
+	int rc = 0;
 	struct rb_node *nd;
 	struct map_groups *kmaps = &machine->kmaps;
 	union perf_event *event = zalloc((sizeof(event->mmap) +
@@ -272,7 +282,7 @@
 		if (pos->dso->kernel)
 			continue;
 
-		size = ALIGN(pos->dso->long_name_len + 1, sizeof(u64));
+		size = PERF_ALIGN(pos->dso->long_name_len + 1, sizeof(u64));
 		event->mmap.header.type = PERF_RECORD_MMAP;
 		event->mmap.header.size = (sizeof(event->mmap) -
 				        (sizeof(event->mmap.filename) - size));
@@ -284,11 +294,14 @@
 
 		memcpy(event->mmap.filename, pos->dso->long_name,
 		       pos->dso->long_name_len + 1);
-		process(tool, event, &synth_sample, machine);
+		if (process(tool, event, &synth_sample, machine) != 0) {
+			rc = -1;
+			break;
+		}
 	}
 
 	free(event);
-	return 0;
+	return rc;
 }
 
 static int __event__synthesize_thread(union perf_event *comm_event,
@@ -392,12 +405,16 @@
 		if (*end) /* only interested in proper numerical dirents */
 			continue;
 
-		__event__synthesize_thread(comm_event, mmap_event, pid, 1,
-					   process, tool, machine);
+		if (__event__synthesize_thread(comm_event, mmap_event, pid, 1,
+					   process, tool, machine) != 0) {
+			err = -1;
+			goto out_closedir;
+		}
 	}
 
-	closedir(proc);
 	err = 0;
+out_closedir:
+	closedir(proc);
 out_free_mmap:
 	free(mmap_event);
 out_free_comm:
@@ -477,7 +494,7 @@
 	map = machine->vmlinux_maps[MAP__FUNCTION];
 	size = snprintf(event->mmap.filename, sizeof(event->mmap.filename),
 			"%s%s", mmap_name, symbol_name) + 1;
-	size = ALIGN(size, sizeof(u64));
+	size = PERF_ALIGN(size, sizeof(u64));
 	event->mmap.header.type = PERF_RECORD_MMAP;
 	event->mmap.header.size = (sizeof(event->mmap) -
 			(sizeof(event->mmap.filename) - size) + machine->id_hdr_size);
@@ -497,9 +514,9 @@
 	return fprintf(fp, ": %s:%d\n", event->comm.comm, event->comm.tid);
 }
 
-int perf_event__process_comm(struct perf_tool *tool __used,
+int perf_event__process_comm(struct perf_tool *tool __maybe_unused,
 			     union perf_event *event,
-			     struct perf_sample *sample __used,
+			     struct perf_sample *sample __maybe_unused,
 			     struct machine *machine)
 {
 	struct thread *thread = machine__findnew_thread(machine, event->comm.tid);
@@ -515,10 +532,10 @@
 	return 0;
 }
 
-int perf_event__process_lost(struct perf_tool *tool __used,
+int perf_event__process_lost(struct perf_tool *tool __maybe_unused,
 			     union perf_event *event,
-			     struct perf_sample *sample __used,
-			     struct machine *machine __used)
+			     struct perf_sample *sample __maybe_unused,
+			     struct machine *machine __maybe_unused)
 {
 	dump_printf(": id:%" PRIu64 ": lost:%" PRIu64 "\n",
 		    event->lost.id, event->lost.lost);
@@ -538,7 +555,8 @@
 		maps[MAP__FUNCTION]->end = ~0ULL;
 }
 
-static int perf_event__process_kernel_mmap(struct perf_tool *tool __used,
+static int perf_event__process_kernel_mmap(struct perf_tool *tool
+					   __maybe_unused,
 					   union perf_event *event,
 					   struct machine *machine)
 {
@@ -640,7 +658,7 @@
 
 int perf_event__process_mmap(struct perf_tool *tool,
 			     union perf_event *event,
-			     struct perf_sample *sample __used,
+			     struct perf_sample *sample __maybe_unused,
 			     struct machine *machine)
 {
 	struct thread *thread;
@@ -684,9 +702,9 @@
 		       event->fork.ppid, event->fork.ptid);
 }
 
-int perf_event__process_task(struct perf_tool *tool __used,
+int perf_event__process_task(struct perf_tool *tool __maybe_unused,
 			     union perf_event *event,
-			     struct perf_sample *sample __used,
+			     struct perf_sample *sample __maybe_unused,
 			      struct machine *machine)
 {
 	struct thread *thread = machine__findnew_thread(machine, event->fork.tid);
@@ -886,8 +904,9 @@
 		al->sym = map__find_symbol(al->map, al->addr, filter);
 	}
 
-	if (symbol_conf.sym_list && al->sym &&
-	    !strlist__has_entry(symbol_conf.sym_list, al->sym->name))
+	if (symbol_conf.sym_list &&
+		(!al->sym || !strlist__has_entry(symbol_conf.sym_list,
+						al->sym->name)))
 		goto out_filtered;
 
 	return 0;
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 0e088d0..21b99e7 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -101,7 +101,7 @@
 struct build_id_event {
 	struct perf_event_header header;
 	pid_t			 pid;
-	u8			 build_id[ALIGN(BUILD_ID_SIZE, sizeof(u64))];
+	u8			 build_id[PERF_ALIGN(BUILD_ID_SIZE, sizeof(u64))];
 	char			 filename[];
 };
 
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 4774ac1..8923537 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -889,3 +889,16 @@
 	struct perf_evsel *evsel = perf_evlist__first(evlist);
 	return perf_evsel__parse_sample(evsel, event, sample, swapped);
 }
+
+size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp)
+{
+	struct perf_evsel *evsel;
+	size_t printed = 0;
+
+	list_for_each_entry(evsel, &evlist->entries, node) {
+		printed += fprintf(fp, "%s%s", evsel->idx ? ", " : "",
+				   perf_evsel__name(evsel));
+	}
+
+	return printed + fprintf(fp, "\n");;
+}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 2ed2557..3f2e1e4 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -143,4 +143,6 @@
 {
 	return list_entry(evlist->entries.prev, struct perf_evsel, node);
 }
+
+size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp);
 #endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 7ff3c8f..1506ba0 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -10,6 +10,7 @@
 #include <byteswap.h>
 #include <linux/bitops.h>
 #include "asm/bug.h"
+#include "event-parse.h"
 #include "evsel.h"
 #include "evlist.h"
 #include "util.h"
@@ -68,7 +69,7 @@
 	return evsel;
 }
 
-static const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX] = {
+const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX] = {
 	"cycles",
 	"instructions",
 	"cache-references",
@@ -131,12 +132,12 @@
 	return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
 }
 
-static const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX] = {
+const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX] = {
 	"cpu-clock",
 	"task-clock",
 	"page-faults",
 	"context-switches",
-	"CPU-migrations",
+	"cpu-migrations",
 	"minor-faults",
 	"major-faults",
 	"alignment-faults",
@@ -1000,3 +1001,37 @@
 
 	return 0;
 }
+
+char *perf_evsel__strval(struct perf_evsel *evsel, struct perf_sample *sample,
+			 const char *name)
+{
+	struct format_field *field = pevent_find_field(evsel->tp_format, name);
+	int offset;
+
+        if (!field)
+                return NULL;
+
+	offset = field->offset;
+
+	if (field->flags & FIELD_IS_DYNAMIC) {
+		offset = *(int *)(sample->raw_data + field->offset);
+		offset &= 0xffff;
+	}
+
+	return sample->raw_data + offset;
+}
+
+u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample,
+		       const char *name)
+{
+	struct format_field *field = pevent_find_field(evsel->tp_format, name);
+	u64 val;
+
+        if (!field)
+                return 0;
+
+	val = pevent_read_number(evsel->tp_format->pevent,
+				 sample->raw_data + field->offset, field->size);
+	return val;
+
+}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 94f6ba1..93876ba 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -66,6 +66,7 @@
 		void		*func;
 		void		*data;
 	} handler;
+	struct cpu_map		*cpus;
 	unsigned int		sample_size;
 	bool 			supported;
 	/* parse modifier helper */
@@ -97,8 +98,10 @@
 				       [PERF_EVSEL__MAX_ALIASES];
 extern const char *perf_evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX]
 					  [PERF_EVSEL__MAX_ALIASES];
-const char *perf_evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX]
-				       [PERF_EVSEL__MAX_ALIASES];
+extern const char *perf_evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX]
+					      [PERF_EVSEL__MAX_ALIASES];
+extern const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX];
+extern const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX];
 int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result,
 					    char *bf, size_t size);
 const char *perf_evsel__name(struct perf_evsel *evsel);
@@ -118,10 +121,24 @@
 		     struct thread_map *threads);
 void perf_evsel__close(struct perf_evsel *evsel, int ncpus, int nthreads);
 
+struct perf_sample;
+
+char *perf_evsel__strval(struct perf_evsel *evsel, struct perf_sample *sample,
+			 const char *name);
+u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample,
+		       const char *name);
+
 #define perf_evsel__match(evsel, t, c)		\
 	(evsel->attr.type == PERF_TYPE_##t &&	\
 	 evsel->attr.config == PERF_COUNT_##c)
 
+static inline bool perf_evsel__match2(struct perf_evsel *e1,
+				      struct perf_evsel *e2)
+{
+	return (e1->attr.type == e2->attr.type) &&
+	       (e1->attr.config == e2->attr.config);
+}
+
 int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
 			      int cpu, int thread, bool scale);
 
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 9696e64..acbf633 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -21,6 +21,7 @@
 #include "debug.h"
 #include "cpumap.h"
 #include "pmu.h"
+#include "vdso.h"
 
 static bool no_buildid_cache = false;
 
@@ -129,7 +130,7 @@
 	int ret;
 
 	olen = strlen(str) + 1;
-	len = ALIGN(olen, NAME_ALIGN);
+	len = PERF_ALIGN(olen, NAME_ALIGN);
 
 	/* write len, incl. \0 */
 	ret = do_write(fd, &len, sizeof(len));
@@ -207,6 +208,29 @@
 			continue;		\
 		else
 
+static int write_buildid(char *name, size_t name_len, u8 *build_id,
+			 pid_t pid, u16 misc, int fd)
+{
+	int err;
+	struct build_id_event b;
+	size_t len;
+
+	len = name_len + 1;
+	len = PERF_ALIGN(len, NAME_ALIGN);
+
+	memset(&b, 0, sizeof(b));
+	memcpy(&b.build_id, build_id, BUILD_ID_SIZE);
+	b.pid = pid;
+	b.header.misc = misc;
+	b.header.size = sizeof(b) + len;
+
+	err = do_write(fd, &b, sizeof(b));
+	if (err < 0)
+		return err;
+
+	return write_padded(fd, name, name_len + 1, len);
+}
+
 static int __dsos__write_buildid_table(struct list_head *head, pid_t pid,
 				u16 misc, int fd)
 {
@@ -214,24 +238,23 @@
 
 	dsos__for_each_with_build_id(pos, head) {
 		int err;
-		struct build_id_event b;
-		size_t len;
+		char  *name;
+		size_t name_len;
 
 		if (!pos->hit)
 			continue;
-		len = pos->long_name_len + 1;
-		len = ALIGN(len, NAME_ALIGN);
-		memset(&b, 0, sizeof(b));
-		memcpy(&b.build_id, pos->build_id, sizeof(pos->build_id));
-		b.pid = pid;
-		b.header.misc = misc;
-		b.header.size = sizeof(b) + len;
-		err = do_write(fd, &b, sizeof(b));
-		if (err < 0)
-			return err;
-		err = write_padded(fd, pos->long_name,
-				   pos->long_name_len + 1, len);
-		if (err < 0)
+
+		if (is_vdso_map(pos->short_name)) {
+			name = (char *) VDSO__MAP_NAME;
+			name_len = sizeof(VDSO__MAP_NAME) + 1;
+		} else {
+			name = pos->long_name;
+			name_len = pos->long_name_len + 1;
+		}
+
+		err = write_buildid(name, name_len, pos->build_id,
+				    pid, misc, fd);
+		if (err)
 			return err;
 	}
 
@@ -277,19 +300,20 @@
 }
 
 int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
-			  const char *name, bool is_kallsyms)
+			  const char *name, bool is_kallsyms, bool is_vdso)
 {
 	const size_t size = PATH_MAX;
 	char *realname, *filename = zalloc(size),
 	     *linkname = zalloc(size), *targetname;
 	int len, err = -1;
+	bool slash = is_kallsyms || is_vdso;
 
 	if (is_kallsyms) {
 		if (symbol_conf.kptr_restrict) {
 			pr_debug("Not caching a kptr_restrict'ed /proc/kallsyms\n");
 			return 0;
 		}
-		realname = (char *)name;
+		realname = (char *) name;
 	} else
 		realname = realpath(name, NULL);
 
@@ -297,7 +321,8 @@
 		goto out_free;
 
 	len = scnprintf(filename, size, "%s%s%s",
-		       debugdir, is_kallsyms ? "/" : "", realname);
+		       debugdir, slash ? "/" : "",
+		       is_vdso ? VDSO__MAP_NAME : realname);
 	if (mkdir_p(filename, 0755))
 		goto out_free;
 
@@ -333,13 +358,14 @@
 
 static int build_id_cache__add_b(const u8 *build_id, size_t build_id_size,
 				 const char *name, const char *debugdir,
-				 bool is_kallsyms)
+				 bool is_kallsyms, bool is_vdso)
 {
 	char sbuild_id[BUILD_ID_SIZE * 2 + 1];
 
 	build_id__sprintf(build_id, build_id_size, sbuild_id);
 
-	return build_id_cache__add_s(sbuild_id, debugdir, name, is_kallsyms);
+	return build_id_cache__add_s(sbuild_id, debugdir, name,
+				     is_kallsyms, is_vdso);
 }
 
 int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir)
@@ -383,9 +409,11 @@
 static int dso__cache_build_id(struct dso *dso, const char *debugdir)
 {
 	bool is_kallsyms = dso->kernel && dso->long_name[0] != '/';
+	bool is_vdso = is_vdso_map(dso->short_name);
 
 	return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id),
-				     dso->long_name, debugdir, is_kallsyms);
+				     dso->long_name, debugdir,
+				     is_kallsyms, is_vdso);
 }
 
 static int __dsos__cache_build_ids(struct list_head *head, const char *debugdir)
@@ -447,7 +475,7 @@
 	return ret;
 }
 
-static int write_tracing_data(int fd, struct perf_header *h __used,
+static int write_tracing_data(int fd, struct perf_header *h __maybe_unused,
 			    struct perf_evlist *evlist)
 {
 	return read_tracing_data(fd, &evlist->entries);
@@ -455,7 +483,7 @@
 
 
 static int write_build_id(int fd, struct perf_header *h,
-			  struct perf_evlist *evlist __used)
+			  struct perf_evlist *evlist __maybe_unused)
 {
 	struct perf_session *session;
 	int err;
@@ -476,8 +504,8 @@
 	return 0;
 }
 
-static int write_hostname(int fd, struct perf_header *h __used,
-			  struct perf_evlist *evlist __used)
+static int write_hostname(int fd, struct perf_header *h __maybe_unused,
+			  struct perf_evlist *evlist __maybe_unused)
 {
 	struct utsname uts;
 	int ret;
@@ -489,8 +517,8 @@
 	return do_write_string(fd, uts.nodename);
 }
 
-static int write_osrelease(int fd, struct perf_header *h __used,
-			   struct perf_evlist *evlist __used)
+static int write_osrelease(int fd, struct perf_header *h __maybe_unused,
+			   struct perf_evlist *evlist __maybe_unused)
 {
 	struct utsname uts;
 	int ret;
@@ -502,8 +530,8 @@
 	return do_write_string(fd, uts.release);
 }
 
-static int write_arch(int fd, struct perf_header *h __used,
-		      struct perf_evlist *evlist __used)
+static int write_arch(int fd, struct perf_header *h __maybe_unused,
+		      struct perf_evlist *evlist __maybe_unused)
 {
 	struct utsname uts;
 	int ret;
@@ -515,14 +543,14 @@
 	return do_write_string(fd, uts.machine);
 }
 
-static int write_version(int fd, struct perf_header *h __used,
-			 struct perf_evlist *evlist __used)
+static int write_version(int fd, struct perf_header *h __maybe_unused,
+			 struct perf_evlist *evlist __maybe_unused)
 {
 	return do_write_string(fd, perf_version_string);
 }
 
-static int write_cpudesc(int fd, struct perf_header *h __used,
-		       struct perf_evlist *evlist __used)
+static int write_cpudesc(int fd, struct perf_header *h __maybe_unused,
+		       struct perf_evlist *evlist __maybe_unused)
 {
 #ifndef CPUINFO_PROC
 #define CPUINFO_PROC NULL
@@ -580,8 +608,8 @@
 	return ret;
 }
 
-static int write_nrcpus(int fd, struct perf_header *h __used,
-			struct perf_evlist *evlist __used)
+static int write_nrcpus(int fd, struct perf_header *h __maybe_unused,
+			struct perf_evlist *evlist __maybe_unused)
 {
 	long nr;
 	u32 nrc, nra;
@@ -606,15 +634,14 @@
 	return do_write(fd, &nra, sizeof(nra));
 }
 
-static int write_event_desc(int fd, struct perf_header *h __used,
+static int write_event_desc(int fd, struct perf_header *h __maybe_unused,
 			    struct perf_evlist *evlist)
 {
 	struct perf_evsel *evsel;
-	u32 nre = 0, nri, sz;
+	u32 nre, nri, sz;
 	int ret;
 
-	list_for_each_entry(evsel, &evlist->entries, node)
-		nre++;
+	nre = evlist->nr_entries;
 
 	/*
 	 * write number of events
@@ -664,8 +691,8 @@
 	return 0;
 }
 
-static int write_cmdline(int fd, struct perf_header *h __used,
-			 struct perf_evlist *evlist __used)
+static int write_cmdline(int fd, struct perf_header *h __maybe_unused,
+			 struct perf_evlist *evlist __maybe_unused)
 {
 	char buf[MAXPATHLEN];
 	char proc[32];
@@ -833,8 +860,8 @@
 	return tp;
 }
 
-static int write_cpu_topology(int fd, struct perf_header *h __used,
-			  struct perf_evlist *evlist __used)
+static int write_cpu_topology(int fd, struct perf_header *h __maybe_unused,
+			  struct perf_evlist *evlist __maybe_unused)
 {
 	struct cpu_topo *tp;
 	u32 i;
@@ -869,8 +896,8 @@
 
 
 
-static int write_total_mem(int fd, struct perf_header *h __used,
-			  struct perf_evlist *evlist __used)
+static int write_total_mem(int fd, struct perf_header *h __maybe_unused,
+			  struct perf_evlist *evlist __maybe_unused)
 {
 	char *buf = NULL;
 	FILE *fp;
@@ -955,8 +982,8 @@
 	return ret;
 }
 
-static int write_numa_topology(int fd, struct perf_header *h __used,
-			  struct perf_evlist *evlist __used)
+static int write_numa_topology(int fd, struct perf_header *h __maybe_unused,
+			  struct perf_evlist *evlist __maybe_unused)
 {
 	char *buf = NULL;
 	size_t len = 0;
@@ -1016,8 +1043,8 @@
  * };
  */
 
-static int write_pmu_mappings(int fd, struct perf_header *h __used,
-			      struct perf_evlist *evlist __used)
+static int write_pmu_mappings(int fd, struct perf_header *h __maybe_unused,
+			      struct perf_evlist *evlist __maybe_unused)
 {
 	struct perf_pmu *pmu = NULL;
 	off_t offset = lseek(fd, 0, SEEK_CUR);
@@ -1047,13 +1074,14 @@
  * default get_cpuid(): nothing gets recorded
  * actual implementation must be in arch/$(ARCH)/util/header.c
  */
-int __attribute__((weak)) get_cpuid(char *buffer __used, size_t sz __used)
+int __attribute__ ((weak)) get_cpuid(char *buffer __maybe_unused,
+				     size_t sz __maybe_unused)
 {
 	return -1;
 }
 
-static int write_cpuid(int fd, struct perf_header *h __used,
-		       struct perf_evlist *evlist __used)
+static int write_cpuid(int fd, struct perf_header *h __maybe_unused,
+		       struct perf_evlist *evlist __maybe_unused)
 {
 	char buffer[64];
 	int ret;
@@ -1067,8 +1095,9 @@
 	return do_write_string(fd, buffer);
 }
 
-static int write_branch_stack(int fd __used, struct perf_header *h __used,
-		       struct perf_evlist *evlist __used)
+static int write_branch_stack(int fd __maybe_unused,
+			      struct perf_header *h __maybe_unused,
+		       struct perf_evlist *evlist __maybe_unused)
 {
 	return 0;
 }
@@ -1345,7 +1374,8 @@
 	free_event_desc(events);
 }
 
-static void print_total_mem(struct perf_header *h __used, int fd, FILE *fp)
+static void print_total_mem(struct perf_header *h __maybe_unused, int fd,
+			    FILE *fp)
 {
 	uint64_t mem;
 	ssize_t ret;
@@ -1363,7 +1393,8 @@
 	fprintf(fp, "# total memory : unknown\n");
 }
 
-static void print_numa_topology(struct perf_header *h __used, int fd, FILE *fp)
+static void print_numa_topology(struct perf_header *h __maybe_unused, int fd,
+				FILE *fp)
 {
 	ssize_t ret;
 	u32 nr, c, i;
@@ -1423,7 +1454,8 @@
 	free(str);
 }
 
-static void print_branch_stack(struct perf_header *ph __used, int fd __used,
+static void print_branch_stack(struct perf_header *ph __maybe_unused,
+			       int fd __maybe_unused,
 			       FILE *fp)
 {
 	fprintf(fp, "# contains samples with branch stack\n");
@@ -1441,6 +1473,9 @@
 	if (ret != sizeof(pmu_num))
 		goto error;
 
+	if (ph->needs_swap)
+		pmu_num = bswap_32(pmu_num);
+
 	if (!pmu_num) {
 		fprintf(fp, "# pmu mappings: not available\n");
 		return;
@@ -1449,6 +1484,9 @@
 	while (pmu_num) {
 		if (read(fd, &type, sizeof(type)) != sizeof(type))
 			break;
+		if (ph->needs_swap)
+			type = bswap_32(type);
+
 		name = do_read_string(fd, ph);
 		if (!name)
 			break;
@@ -1527,7 +1565,7 @@
 	struct perf_session *session = container_of(header, struct perf_session, header);
 	struct {
 		struct perf_event_header   header;
-		u8			   build_id[ALIGN(BUILD_ID_SIZE, sizeof(u64))];
+		u8			   build_id[PERF_ALIGN(BUILD_ID_SIZE, sizeof(u64))];
 		char			   filename[0];
 	} old_bev;
 	struct build_id_event bev;
@@ -1616,9 +1654,10 @@
 	return err;
 }
 
-static int process_tracing_data(struct perf_file_section *section __unused,
-			      struct perf_header *ph __unused,
-			      int feat __unused, int fd, void *data)
+static int process_tracing_data(struct perf_file_section *section
+				__maybe_unused,
+			      struct perf_header *ph __maybe_unused,
+			      int feat __maybe_unused, int fd, void *data)
 {
 	trace_report(fd, data, false);
 	return 0;
@@ -1626,7 +1665,8 @@
 
 static int process_build_id(struct perf_file_section *section,
 			    struct perf_header *ph,
-			    int feat __unused, int fd, void *data __used)
+			    int feat __maybe_unused, int fd,
+			    void *data __maybe_unused)
 {
 	if (perf_header__read_build_ids(ph, fd, section->offset, section->size))
 		pr_debug("Failed to read buildids, continuing...\n");
@@ -1665,9 +1705,9 @@
 }
 
 static int
-process_event_desc(struct perf_file_section *section __unused,
-		   struct perf_header *header, int feat __unused, int fd,
-		   void *data __used)
+process_event_desc(struct perf_file_section *section __maybe_unused,
+		   struct perf_header *header, int feat __maybe_unused, int fd,
+		   void *data __maybe_unused)
 {
 	struct perf_session *session = container_of(header, struct perf_session, header);
 	struct perf_evsel *evsel, *events = read_event_desc(header, fd);
@@ -2290,33 +2330,39 @@
 	return ret <= 0 ? -1 : 0;
 }
 
-static int perf_evsel__set_tracepoint_name(struct perf_evsel *evsel,
-					   struct pevent *pevent)
+static int perf_evsel__prepare_tracepoint_event(struct perf_evsel *evsel,
+						struct pevent *pevent)
 {
-	struct event_format *event = pevent_find_event(pevent,
-						       evsel->attr.config);
+	struct event_format *event;
 	char bf[128];
 
+	/* already prepared */
+	if (evsel->tp_format)
+		return 0;
+
+	event = pevent_find_event(pevent, evsel->attr.config);
 	if (event == NULL)
 		return -1;
 
-	snprintf(bf, sizeof(bf), "%s:%s", event->system, event->name);
-	evsel->name = strdup(bf);
-	if (event->name == NULL)
-		return -1;
+	if (!evsel->name) {
+		snprintf(bf, sizeof(bf), "%s:%s", event->system, event->name);
+		evsel->name = strdup(bf);
+		if (evsel->name == NULL)
+			return -1;
+	}
 
 	evsel->tp_format = event;
 	return 0;
 }
 
-static int perf_evlist__set_tracepoint_names(struct perf_evlist *evlist,
-					     struct pevent *pevent)
+static int perf_evlist__prepare_tracepoint_events(struct perf_evlist *evlist,
+						  struct pevent *pevent)
 {
 	struct perf_evsel *pos;
 
 	list_for_each_entry(pos, &evlist->entries, node) {
 		if (pos->attr.type == PERF_TYPE_TRACEPOINT &&
-		    perf_evsel__set_tracepoint_name(pos, pevent))
+		    perf_evsel__prepare_tracepoint_event(pos, pevent))
 			return -1;
 	}
 
@@ -2404,7 +2450,8 @@
 
 	lseek(fd, header->data_offset, SEEK_SET);
 
-	if (perf_evlist__set_tracepoint_names(session->evlist, session->pevent))
+	if (perf_evlist__prepare_tracepoint_events(session->evlist,
+						   session->pevent))
 		goto out_delete_evlist;
 
 	header->frozen = 1;
@@ -2427,7 +2474,7 @@
 	int err;
 
 	size = sizeof(struct perf_event_attr);
-	size = ALIGN(size, sizeof(u64));
+	size = PERF_ALIGN(size, sizeof(u64));
 	size += sizeof(struct perf_event_header);
 	size += ids * sizeof(u64);
 
@@ -2525,7 +2572,7 @@
 
 	ev.event_type.header.type = PERF_RECORD_HEADER_EVENT_TYPE;
 	size = strlen(ev.event_type.event_type.name);
-	size = ALIGN(size, sizeof(u64));
+	size = PERF_ALIGN(size, sizeof(u64));
 	ev.event_type.header.size = sizeof(ev.event_type) -
 		(sizeof(ev.event_type.event_type.name) - size);
 
@@ -2556,7 +2603,7 @@
 	return err;
 }
 
-int perf_event__process_event_type(struct perf_tool *tool __unused,
+int perf_event__process_event_type(struct perf_tool *tool __maybe_unused,
 				   union perf_event *event)
 {
 	if (perf_header__push_event(event->event_type.event_type.event_id,
@@ -2573,7 +2620,7 @@
 	union perf_event ev;
 	struct tracing_data *tdata;
 	ssize_t size = 0, aligned_size = 0, padding;
-	int err __used = 0;
+	int err __maybe_unused = 0;
 
 	/*
 	 * We are going to store the size of the data followed
@@ -2594,7 +2641,7 @@
 
 	ev.tracing_data.header.type = PERF_RECORD_HEADER_TRACING_DATA;
 	size = tdata->size;
-	aligned_size = ALIGN(size, sizeof(u64));
+	aligned_size = PERF_ALIGN(size, sizeof(u64));
 	padding = aligned_size - size;
 	ev.tracing_data.header.size = sizeof(ev.tracing_data);
 	ev.tracing_data.size = aligned_size;
@@ -2625,7 +2672,7 @@
 
 	size_read = trace_report(session->fd, &session->pevent,
 				 session->repipe);
-	padding = ALIGN(size_read, sizeof(u64)) - size_read;
+	padding = PERF_ALIGN(size_read, sizeof(u64)) - size_read;
 
 	if (read(session->fd, buf, padding) < 0)
 		die("reading input file");
@@ -2638,7 +2685,8 @@
 	if (size_read + padding != size)
 		die("tracing data size mismatch");
 
-	perf_evlist__set_tracepoint_names(session->evlist, session->pevent);
+	perf_evlist__prepare_tracepoint_events(session->evlist,
+					       session->pevent);
 
 	return size_read + padding;
 }
@@ -2658,7 +2706,7 @@
 	memset(&ev, 0, sizeof(ev));
 
 	len = pos->long_name_len + 1;
-	len = ALIGN(len, NAME_ALIGN);
+	len = PERF_ALIGN(len, NAME_ALIGN);
 	memcpy(&ev.build_id.build_id, pos->build_id, sizeof(pos->build_id));
 	ev.build_id.header.type = PERF_RECORD_HEADER_BUILD_ID;
 	ev.build_id.header.misc = misc;
@@ -2671,7 +2719,7 @@
 	return err;
 }
 
-int perf_event__process_build_id(struct perf_tool *tool __used,
+int perf_event__process_build_id(struct perf_tool *tool __maybe_unused,
 				 union perf_event *event,
 				 struct perf_session *session)
 {
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 9d5eedc..209dad4 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -96,7 +96,7 @@
 int perf_header__fprintf_info(struct perf_session *s, FILE *fp, bool full);
 
 int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
-			  const char *name, bool is_kallsyms);
+			  const char *name, bool is_kallsyms, bool is_vdso);
 int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir);
 
 int perf_event__synthesize_attr(struct perf_tool *tool,
diff --git a/tools/perf/util/help.c b/tools/perf/util/help.c
index 6f2975a..8b1f6e8 100644
--- a/tools/perf/util/help.c
+++ b/tools/perf/util/help.c
@@ -3,6 +3,7 @@
 #include "exec_cmd.h"
 #include "levenshtein.h"
 #include "help.h"
+#include <termios.h>
 
 void add_cmdname(struct cmdnames *cmds, const char *name, size_t len)
 {
@@ -331,7 +332,8 @@
 	exit(1);
 }
 
-int cmd_version(int argc __used, const char **argv __used, const char *prefix __used)
+int cmd_version(int argc __maybe_unused, const char **argv __maybe_unused,
+		const char *prefix __maybe_unused)
 {
 	printf("perf version %s\n", perf_version_string);
 	return 0;
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index b1817f1..6ec5398 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -394,7 +394,7 @@
  * collapse the histogram
  */
 
-static bool hists__collapse_insert_entry(struct hists *hists __used,
+static bool hists__collapse_insert_entry(struct hists *hists __maybe_unused,
 					 struct rb_root *root,
 					 struct hist_entry *he)
 {
@@ -563,39 +563,6 @@
 	return __hists__output_resort(hists, true);
 }
 
-/*
- * See hists__fprintf to match the column widths
- */
-unsigned int hists__sort_list_width(struct hists *hists)
-{
-	struct sort_entry *se;
-	int ret = 9; /* total % */
-
-	if (symbol_conf.show_cpu_utilization) {
-		ret += 7; /* count_sys % */
-		ret += 6; /* count_us % */
-		if (perf_guest) {
-			ret += 13; /* count_guest_sys % */
-			ret += 12; /* count_guest_us % */
-		}
-	}
-
-	if (symbol_conf.show_nr_samples)
-		ret += 11;
-
-	if (symbol_conf.show_total_period)
-		ret += 13;
-
-	list_for_each_entry(se, &hist_entry__sort_list, list)
-		if (!se->elide)
-			ret += 2 + hists__col_len(hists, se->se_width_idx);
-
-	if (verbose) /* Addr + origin */
-		ret += 3 + BITS_PER_LONG / 4;
-
-	return ret;
-}
-
 static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *h,
 				       enum hist_filter filter)
 {
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 2e650ff..f011ad4 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -115,24 +115,63 @@
 void hists__reset_col_len(struct hists *hists);
 void hists__calc_col_len(struct hists *hists, struct hist_entry *he);
 
+struct perf_hpp {
+	char *buf;
+	size_t size;
+	u64 total_period;
+	const char *sep;
+	long displacement;
+	void *ptr;
+};
+
+struct perf_hpp_fmt {
+	bool cond;
+	int (*header)(struct perf_hpp *hpp);
+	int (*width)(struct perf_hpp *hpp);
+	int (*color)(struct perf_hpp *hpp, struct hist_entry *he);
+	int (*entry)(struct perf_hpp *hpp, struct hist_entry *he);
+};
+
+extern struct perf_hpp_fmt perf_hpp__format[];
+
+enum {
+	PERF_HPP__OVERHEAD,
+	PERF_HPP__OVERHEAD_SYS,
+	PERF_HPP__OVERHEAD_US,
+	PERF_HPP__OVERHEAD_GUEST_SYS,
+	PERF_HPP__OVERHEAD_GUEST_US,
+	PERF_HPP__SAMPLES,
+	PERF_HPP__PERIOD,
+	PERF_HPP__DELTA,
+	PERF_HPP__DISPL,
+
+	PERF_HPP__MAX_INDEX
+};
+
+void perf_hpp__init(bool need_pair, bool show_displacement);
+int hist_entry__period_snprintf(struct perf_hpp *hpp, struct hist_entry *he,
+				bool color);
+
 struct perf_evlist;
 
 #ifdef NO_NEWT_SUPPORT
 static inline
-int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __used,
-				  const char *help __used,
-				  void(*timer)(void *arg) __used,
-				  void *arg __used,
-				  int refresh __used)
+int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __maybe_unused,
+				  const char *help __maybe_unused,
+				  void(*timer)(void *arg) __maybe_unused,
+				  void *arg __maybe_unused,
+				  int refresh __maybe_unused)
 {
 	return 0;
 }
 
-static inline int hist_entry__tui_annotate(struct hist_entry *self __used,
-					   int evidx __used,
-					   void(*timer)(void *arg) __used,
-					   void *arg __used,
-					   int delay_secs __used)
+static inline int hist_entry__tui_annotate(struct hist_entry *self
+					   __maybe_unused,
+					   int evidx __maybe_unused,
+					   void(*timer)(void *arg)
+					   __maybe_unused,
+					   void *arg __maybe_unused,
+					   int delay_secs __maybe_unused)
 {
 	return 0;
 }
@@ -150,11 +189,11 @@
 
 #ifdef NO_GTK2_SUPPORT
 static inline
-int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist __used,
-				  const char *help __used,
-				  void(*timer)(void *arg) __used,
-				  void *arg __used,
-				  int refresh __used)
+int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist __maybe_unused,
+				  const char *help __maybe_unused,
+				  void(*timer)(void *arg) __maybe_unused,
+				  void *arg __maybe_unused,
+				  int refresh __maybe_unused)
 {
 	return 0;
 }
diff --git a/tools/perf/util/include/linux/bitops.h b/tools/perf/util/include/linux/bitops.h
index 587a230..a55d8cf0 100644
--- a/tools/perf/util/include/linux/bitops.h
+++ b/tools/perf/util/include/linux/bitops.h
@@ -5,6 +5,10 @@
 #include <linux/compiler.h>
 #include <asm/hweight.h>
 
+#ifndef __WORDSIZE
+#define __WORDSIZE (__SIZEOF_LONG__ * 8)
+#endif
+
 #define BITS_PER_LONG __WORDSIZE
 #define BITS_PER_BYTE           8
 #define BITS_TO_LONGS(nr)       DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long))
diff --git a/tools/perf/util/include/linux/compiler.h b/tools/perf/util/include/linux/compiler.h
index 2dc8671..96b919d 100644
--- a/tools/perf/util/include/linux/compiler.h
+++ b/tools/perf/util/include/linux/compiler.h
@@ -9,7 +9,13 @@
 #define __attribute_const__
 #endif
 
-#define __used		__attribute__((__unused__))
+#ifndef __maybe_unused
+#define __maybe_unused		__attribute__((unused))
+#endif
 #define __packed	__attribute__((__packed__))
 
+#ifndef __force
+#define __force
+#endif
+
 #endif
diff --git a/tools/perf/util/include/linux/kernel.h b/tools/perf/util/include/linux/kernel.h
index b6842c1..d8c927c 100644
--- a/tools/perf/util/include/linux/kernel.h
+++ b/tools/perf/util/include/linux/kernel.h
@@ -8,8 +8,8 @@
 
 #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
 
-#define ALIGN(x,a)		__ALIGN_MASK(x,(typeof(x))(a)-1)
-#define __ALIGN_MASK(x,mask)	(((x)+(mask))&~(mask))
+#define PERF_ALIGN(x, a)	__PERF_ALIGN_MASK(x, (typeof(x))(a)-1)
+#define __PERF_ALIGN_MASK(x, mask)	(((x)+(mask))&~(mask))
 
 #ifndef offsetof
 #define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
@@ -46,9 +46,22 @@
 	_min1 < _min2 ? _min1 : _min2; })
 #endif
 
+#ifndef roundup
+#define roundup(x, y) (                                \
+{                                                      \
+	const typeof(y) __y = y;		       \
+	(((x) + (__y - 1)) / __y) * __y;	       \
+}                                                      \
+)
+#endif
+
 #ifndef BUG_ON
+#ifdef NDEBUG
+#define BUG_ON(cond) do { if (cond) {} } while (0)
+#else
 #define BUG_ON(cond) assert(!(cond))
 #endif
+#endif
 
 /*
  * Both need more care to handle endianness
diff --git a/tools/perf/util/include/linux/magic.h b/tools/perf/util/include/linux/magic.h
new file mode 100644
index 0000000..58b64ed
--- /dev/null
+++ b/tools/perf/util/include/linux/magic.h
@@ -0,0 +1,12 @@
+#ifndef _PERF_LINUX_MAGIC_H_
+#define _PERF_LINUX_MAGIC_H_
+
+#ifndef DEBUGFS_MAGIC
+#define DEBUGFS_MAGIC          0x64626720
+#endif
+
+#ifndef SYSFS_MAGIC
+#define SYSFS_MAGIC            0x62656572
+#endif
+
+#endif
diff --git a/tools/perf/util/include/linux/rbtree.h b/tools/perf/util/include/linux/rbtree.h
index 7a243a1..2a030c5 100644
--- a/tools/perf/util/include/linux/rbtree.h
+++ b/tools/perf/util/include/linux/rbtree.h
@@ -1 +1,2 @@
+#include <stdbool.h>
 #include "../../../../include/linux/rbtree.h"
diff --git a/tools/perf/util/include/linux/string.h b/tools/perf/util/include/linux/string.h
index 3b2f590..6f19c54 100644
--- a/tools/perf/util/include/linux/string.h
+++ b/tools/perf/util/include/linux/string.h
@@ -1 +1,3 @@
 #include <string.h>
+
+void *memdup(const void *src, size_t len);
diff --git a/tools/perf/util/include/linux/types.h b/tools/perf/util/include/linux/types.h
index 12de3b8..eb46478 100644
--- a/tools/perf/util/include/linux/types.h
+++ b/tools/perf/util/include/linux/types.h
@@ -3,6 +3,14 @@
 
 #include <asm/types.h>
 
+#ifndef __bitwise
+#define __bitwise
+#endif
+
+#ifndef __le32
+typedef __u32 __bitwise __le32;
+#endif
+
 #define DECLARE_BITMAP(name,bits) \
 	unsigned long name[BITS_TO_LONGS(bits)]
 
diff --git a/tools/perf/util/intlist.c b/tools/perf/util/intlist.c
index fd530dc..9d07400 100644
--- a/tools/perf/util/intlist.c
+++ b/tools/perf/util/intlist.c
@@ -11,7 +11,7 @@
 
 #include "intlist.h"
 
-static struct rb_node *intlist__node_new(struct rblist *rblist __used,
+static struct rb_node *intlist__node_new(struct rblist *rblist __maybe_unused,
 					 const void *entry)
 {
 	int i = (int)((long)entry);
@@ -31,7 +31,7 @@
 	free(ilist);
 }
 
-static void intlist__node_delete(struct rblist *rblist __used,
+static void intlist__node_delete(struct rblist *rblist __maybe_unused,
 				 struct rb_node *rb_node)
 {
 	struct int_node *node = container_of(rb_node, struct int_node, rb_node);
@@ -52,9 +52,9 @@
 	return rblist__add_node(&ilist->rblist, (void *)((long)i));
 }
 
-void intlist__remove(struct intlist *ilist __used, struct int_node *node)
+void intlist__remove(struct intlist *ilist, struct int_node *node)
 {
-	int_node__delete(node);
+	rblist__remove_node(&ilist->rblist, &node->rb_node);
 }
 
 struct int_node *intlist__find(struct intlist *ilist, int i)
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 7d37159..b442ee4 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -9,6 +9,7 @@
 #include "map.h"
 #include "thread.h"
 #include "strlist.h"
+#include "vdso.h"
 
 const char *map_type__name[MAP__NR_TYPES] = {
 	[MAP__FUNCTION] = "Functions",
@@ -23,7 +24,6 @@
 static inline int is_no_dso_memory(const char *filename)
 {
 	return !strcmp(filename, "[stack]") ||
-	       !strcmp(filename, "[vdso]")  ||
 	       !strcmp(filename, "[heap]");
 }
 
@@ -52,9 +52,10 @@
 	if (self != NULL) {
 		char newfilename[PATH_MAX];
 		struct dso *dso;
-		int anon, no_dso;
+		int anon, no_dso, vdso;
 
 		anon = is_anon_memory(filename);
+		vdso = is_vdso_map(filename);
 		no_dso = is_no_dso_memory(filename);
 
 		if (anon) {
@@ -62,7 +63,12 @@
 			filename = newfilename;
 		}
 
-		dso = __dsos__findnew(dsos__list, filename);
+		if (vdso) {
+			pgoff = 0;
+			dso = vdso__dso_findnew(dsos__list);
+		} else
+			dso = __dsos__findnew(dsos__list, filename);
+
 		if (dso == NULL)
 			goto out_delete;
 
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 25ab4cd..d2250fc 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -96,7 +96,7 @@
 	return ip + map->start - map->pgoff;
 }
 
-static inline u64 identity__map_ip(struct map *map __used, u64 ip)
+static inline u64 identity__map_ip(struct map *map __maybe_unused, u64 ip)
 {
 	return ip;
 }
diff --git a/tools/perf/util/parse-events-test.c b/tools/perf/util/parse-events-test.c
index bc8b651..d7244e5 100644
--- a/tools/perf/util/parse-events-test.c
+++ b/tools/perf/util/parse-events-test.c
@@ -569,7 +569,7 @@
 	return 0;
 }
 
-static int test__group3(struct perf_evlist *evlist __used)
+static int test__group3(struct perf_evlist *evlist __maybe_unused)
 {
 	struct perf_evsel *evsel, *leader;
 
@@ -648,7 +648,7 @@
 	return 0;
 }
 
-static int test__group4(struct perf_evlist *evlist __used)
+static int test__group4(struct perf_evlist *evlist __maybe_unused)
 {
 	struct perf_evsel *evsel, *leader;
 
@@ -684,7 +684,7 @@
 	return 0;
 }
 
-static int test__group5(struct perf_evlist *evlist __used)
+static int test__group5(struct perf_evlist *evlist __maybe_unused)
 {
 	struct perf_evsel *evsel, *leader;
 
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index b246303..bf5d033 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -239,8 +239,11 @@
 	return "unknown";
 }
 
-static int add_event(struct list_head **_list, int *idx,
-		     struct perf_event_attr *attr, char *name)
+
+
+static int __add_event(struct list_head **_list, int *idx,
+		       struct perf_event_attr *attr,
+		       char *name, struct cpu_map *cpus)
 {
 	struct perf_evsel *evsel;
 	struct list_head *list = *_list;
@@ -260,6 +263,7 @@
 		return -ENOMEM;
 	}
 
+	evsel->cpus = cpus;
 	if (name)
 		evsel->name = strdup(name);
 	list_add_tail(&evsel->node, list);
@@ -267,6 +271,12 @@
 	return 0;
 }
 
+static int add_event(struct list_head **_list, int *idx,
+		     struct perf_event_attr *attr, char *name)
+{
+	return __add_event(_list, idx, attr, name, NULL);
+}
+
 static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES], int size)
 {
 	int i, j;
@@ -308,7 +318,7 @@
 	for (i = 0; (i < 2) && (op_result[i]); i++) {
 		char *str = op_result[i];
 
-		snprintf(name + n, MAX_NAME_LEN - n, "-%s\n", str);
+		n += snprintf(name + n, MAX_NAME_LEN - n, "-%s", str);
 
 		if (cache_op == -1) {
 			cache_op = parse_aliases(str, perf_evsel__hw_cache_op,
@@ -607,8 +617,8 @@
 	if (perf_pmu__config(pmu, &attr, head_config))
 		return -EINVAL;
 
-	return add_event(list, idx, &attr,
-			 pmu_event_name(head_config));
+	return __add_event(list, idx, &attr, pmu_event_name(head_config),
+			   pmu->cpus);
 }
 
 int parse_events__modifier_group(struct list_head *list,
@@ -807,7 +817,8 @@
 	return ret;
 }
 
-int parse_events(struct perf_evlist *evlist, const char *str, int unset __used)
+int parse_events(struct perf_evlist *evlist, const char *str,
+		 int unset __maybe_unused)
 {
 	struct parse_events_data__events data = {
 		.list = LIST_HEAD_INIT(data.list),
@@ -833,14 +844,14 @@
 }
 
 int parse_events_option(const struct option *opt, const char *str,
-			int unset __used)
+			int unset __maybe_unused)
 {
 	struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
 	return parse_events(evlist, str, unset);
 }
 
 int parse_filter(const struct option *opt, const char *str,
-		 int unset __used)
+		 int unset __maybe_unused)
 {
 	struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
 	struct perf_evsel *last = NULL;
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index f5e28dc..c87efc1 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -207,7 +207,7 @@
 
 %%
 
-int parse_events_wrap(void *scanner __used)
+int parse_events_wrap(void *scanner __maybe_unused)
 {
 	return 1;
 }
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index 42d9a17..cd88209 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -391,7 +391,7 @@
 
 %%
 
-void parse_events_error(void *data __used, void *scanner __used,
-			char const *msg __used)
+void parse_events_error(void *data __maybe_unused, void *scanner __maybe_unused,
+			char const *msg __maybe_unused)
 {
 }
diff --git a/tools/perf/util/parse-options.c b/tools/perf/util/parse-options.c
index 594f8fa..443fc11 100644
--- a/tools/perf/util/parse-options.c
+++ b/tools/perf/util/parse-options.c
@@ -557,7 +557,8 @@
 }
 
 
-int parse_opt_verbosity_cb(const struct option *opt, const char *arg __used,
+int parse_opt_verbosity_cb(const struct option *opt,
+			   const char *arg __maybe_unused,
 			   int unset)
 {
 	int *target = opt->value;
diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h
index 9bd6c4e..316dbe7 100644
--- a/tools/perf/util/perf_regs.h
+++ b/tools/perf/util/perf_regs.h
@@ -6,7 +6,7 @@
 #else
 #define PERF_REGS_MASK	0
 
-static inline const char *perf_reg_name(int id __used)
+static inline const char *perf_reg_name(int id __maybe_unused)
 {
 	return NULL;
 }
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 6631d82..8a2229d 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -9,6 +9,7 @@
 #include "util.h"
 #include "pmu.h"
 #include "parse-events.h"
+#include "cpumap.h"
 
 #define EVENT_SOURCE_DEVICE_PATH "/bus/event_source/devices/"
 
@@ -253,6 +254,33 @@
 	closedir(dir);
 }
 
+static struct cpu_map *pmu_cpumask(char *name)
+{
+	struct stat st;
+	char path[PATH_MAX];
+	const char *sysfs;
+	FILE *file;
+	struct cpu_map *cpus;
+
+	sysfs = sysfs_find_mountpoint();
+	if (!sysfs)
+		return NULL;
+
+	snprintf(path, PATH_MAX,
+		 "%s/bus/event_source/devices/%s/cpumask", sysfs, name);
+
+	if (stat(path, &st) < 0)
+		return NULL;
+
+	file = fopen(path, "r");
+	if (!file)
+		return NULL;
+
+	cpus = cpu_map__read(file);
+	fclose(file);
+	return cpus;
+}
+
 static struct perf_pmu *pmu_lookup(char *name)
 {
 	struct perf_pmu *pmu;
@@ -275,6 +303,8 @@
 	if (!pmu)
 		return NULL;
 
+	pmu->cpus = pmu_cpumask(name);
+
 	pmu_aliases(name, &aliases);
 
 	INIT_LIST_HEAD(&pmu->format);
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 47f68d3..53c7794 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -28,6 +28,7 @@
 struct perf_pmu {
 	char *name;
 	__u32 type;
+	struct cpu_map *cpus;
 	struct list_head format;
 	struct list_head aliases;
 	struct list_head list;
diff --git a/tools/perf/util/pmu.y b/tools/perf/util/pmu.y
index 20ea77e..ec89804 100644
--- a/tools/perf/util/pmu.y
+++ b/tools/perf/util/pmu.y
@@ -86,8 +86,8 @@
 
 %%
 
-void perf_pmu_error(struct list_head *list __used,
-		    char *name __used,
-		    char const *msg __used)
+void perf_pmu_error(struct list_head *list __maybe_unused,
+		    char *name __maybe_unused,
+		    char const *msg __maybe_unused)
 {
 }
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 0dda25d..4ce04c2 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -41,7 +41,7 @@
 #include "symbol.h"
 #include "thread.h"
 #include "debugfs.h"
-#include "trace-event.h"	/* For __unused */
+#include "trace-event.h"	/* For __maybe_unused */
 #include "probe-event.h"
 #include "probe-finder.h"
 #include "session.h"
@@ -647,8 +647,8 @@
 }
 
 static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
-				struct probe_trace_event **tevs __unused,
-				int max_tevs __unused, const char *target)
+				struct probe_trace_event **tevs __maybe_unused,
+				int max_tevs __maybe_unused, const char *target)
 {
 	if (perf_probe_event_need_dwarf(pev)) {
 		pr_warning("Debuginfo-analysis is not supported.\n");
@@ -661,17 +661,18 @@
 	return 0;
 }
 
-int show_line_range(struct line_range *lr __unused, const char *module __unused)
+int show_line_range(struct line_range *lr __maybe_unused,
+		    const char *module __maybe_unused)
 {
 	pr_warning("Debuginfo-analysis is not supported.\n");
 	return -ENOSYS;
 }
 
-int show_available_vars(struct perf_probe_event *pevs __unused,
-			int npevs __unused, int max_vls __unused,
-			const char *module __unused,
-			struct strfilter *filter __unused,
-			bool externs __unused)
+int show_available_vars(struct perf_probe_event *pevs __maybe_unused,
+			int npevs __maybe_unused, int max_vls __maybe_unused,
+			const char *module __maybe_unused,
+			struct strfilter *filter __maybe_unused,
+			bool externs __maybe_unused)
 {
 	pr_warning("Debuginfo-analysis is not supported.\n");
 	return -ENOSYS;
@@ -2183,7 +2184,7 @@
  * If a symbol corresponds to a function with global binding and
  * matches filter return 0. For all others return 1.
  */
-static int filter_available_functions(struct map *map __unused,
+static int filter_available_functions(struct map *map __maybe_unused,
 				      struct symbol *sym)
 {
 	if (sym->binding == STB_GLOBAL &&
@@ -2307,10 +2308,17 @@
 		function = NULL;
 	}
 	if (!pev->group) {
-		char *ptr1, *ptr2;
+		char *ptr1, *ptr2, *exec_copy;
 
 		pev->group = zalloc(sizeof(char *) * 64);
-		ptr1 = strdup(basename(exec));
+		exec_copy = strdup(exec);
+		if (!exec_copy) {
+			ret = -ENOMEM;
+			pr_warning("Failed to copy exec string.\n");
+			goto out;
+		}
+
+		ptr1 = strdup(basename(exec_copy));
 		if (ptr1) {
 			ptr2 = strpbrk(ptr1, "-._");
 			if (ptr2)
@@ -2319,6 +2327,7 @@
 					ptr1);
 			free(ptr1);
 		}
+		free(exec_copy);
 	}
 	free(pp->function);
 	pp->function = zalloc(sizeof(char *) * MAX_PROBE_ARGS);
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index d448984..1daf5c1 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -207,7 +207,7 @@
 #else
 /* With older elfutils, this just support kernel module... */
 static int debuginfo__init_online_kernel_dwarf(struct debuginfo *self,
-					       Dwarf_Addr addr __used)
+					       Dwarf_Addr addr __maybe_unused)
 {
 	const char *path = kernel_get_module_path("kernel");
 
@@ -525,8 +525,10 @@
 			return -ENOENT;
 		}
 		/* Verify it is a data structure  */
-		if (dwarf_tag(&type) != DW_TAG_structure_type) {
-			pr_warning("%s is not a data structure.\n", varname);
+		tag = dwarf_tag(&type);
+		if (tag != DW_TAG_structure_type && tag != DW_TAG_union_type) {
+			pr_warning("%s is not a data structure nor an union.\n",
+				   varname);
 			return -EINVAL;
 		}
 
@@ -539,8 +541,9 @@
 			*ref_ptr = ref;
 	} else {
 		/* Verify it is a data structure  */
-		if (tag != DW_TAG_structure_type) {
-			pr_warning("%s is not a data structure.\n", varname);
+		if (tag != DW_TAG_structure_type && tag != DW_TAG_union_type) {
+			pr_warning("%s is not a data structure nor an union.\n",
+				   varname);
 			return -EINVAL;
 		}
 		if (field->name[0] == '[') {
@@ -567,10 +570,15 @@
 	}
 
 	/* Get the offset of the field */
-	ret = die_get_data_member_location(die_mem, &offs);
-	if (ret < 0) {
-		pr_warning("Failed to get the offset of %s.\n", field->name);
-		return ret;
+	if (tag == DW_TAG_union_type) {
+		offs = 0;
+	} else {
+		ret = die_get_data_member_location(die_mem, &offs);
+		if (ret < 0) {
+			pr_warning("Failed to get the offset of %s.\n",
+				   field->name);
+			return ret;
+		}
 	}
 	ref->offset += (long)offs;
 
@@ -1419,7 +1427,7 @@
 }
 
 static int line_range_walk_cb(const char *fname, int lineno,
-			      Dwarf_Addr addr __used,
+			      Dwarf_Addr addr __maybe_unused,
 			      void *data)
 {
 	struct line_finder *lf = data;
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 27187f0..ca85444 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -672,7 +672,7 @@
 };
 
 static int pyrf_evlist__init(struct pyrf_evlist *pevlist,
-			     PyObject *args, PyObject *kwargs __used)
+			     PyObject *args, PyObject *kwargs __maybe_unused)
 {
 	PyObject *pcpus = NULL, *pthreads = NULL;
 	struct cpu_map *cpus;
@@ -733,7 +733,8 @@
 }
 
 static PyObject *pyrf_evlist__get_pollfd(struct pyrf_evlist *pevlist,
-					 PyObject *args __used, PyObject *kwargs __used)
+					 PyObject *args __maybe_unused,
+					 PyObject *kwargs __maybe_unused)
 {
 	struct perf_evlist *evlist = &pevlist->evlist;
         PyObject *list = PyList_New(0);
@@ -765,7 +766,8 @@
 
 
 static PyObject *pyrf_evlist__add(struct pyrf_evlist *pevlist,
-				  PyObject *args, PyObject *kwargs __used)
+				  PyObject *args,
+				  PyObject *kwargs __maybe_unused)
 {
 	struct perf_evlist *evlist = &pevlist->evlist;
 	PyObject *pevsel;
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index d280010..ffde3e4 100644
--- a/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -25,16 +25,16 @@
 #include <ctype.h>
 #include <errno.h>
 
-#include "../../perf.h"
 #include "../util.h"
+#include <EXTERN.h>
+#include <perl.h>
+
+#include "../../perf.h"
 #include "../thread.h"
 #include "../event.h"
 #include "../trace-event.h"
 #include "../evsel.h"
 
-#include <EXTERN.h>
-#include <perl.h>
-
 void boot_Perf__Trace__Context(pTHX_ CV *cv);
 void boot_DynaLoader(pTHX_ CV *cv);
 typedef PerlInterpreter * INTERP;
@@ -257,10 +257,10 @@
 	return event;
 }
 
-static void perl_process_tracepoint(union perf_event *perf_event __unused,
+static void perl_process_tracepoint(union perf_event *perf_event __maybe_unused,
 				    struct perf_sample *sample,
 				    struct perf_evsel *evsel,
-				    struct machine *machine __unused,
+				    struct machine *machine __maybe_unused,
 				    struct addr_location *al)
 {
 	struct format_field *field;
@@ -349,8 +349,8 @@
 static void perl_process_event_generic(union perf_event *event,
 				       struct perf_sample *sample,
 				       struct perf_evsel *evsel,
-				       struct machine *machine __unused,
-				       struct addr_location *al __unused)
+				       struct machine *machine __maybe_unused,
+				       struct addr_location *al __maybe_unused)
 {
 	dSP;
 
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index afba097..730c663 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -221,10 +221,11 @@
 	return event;
 }
 
-static void python_process_tracepoint(union perf_event *perf_event __unused,
+static void python_process_tracepoint(union perf_event *perf_event
+				      __maybe_unused,
 				 struct perf_sample *sample,
 				 struct perf_evsel *evsel,
-				 struct machine *machine __unused,
+				 struct machine *machine __maybe_unused,
 				 struct addr_location *al)
 {
 	PyObject *handler, *retval, *context, *t, *obj, *dict = NULL;
@@ -339,10 +340,11 @@
 	Py_DECREF(t);
 }
 
-static void python_process_general_event(union perf_event *perf_event __unused,
+static void python_process_general_event(union perf_event *perf_event
+					 __maybe_unused,
 					 struct perf_sample *sample,
 					 struct perf_evsel *evsel,
-					 struct machine *machine __unused,
+					 struct machine *machine __maybe_unused,
 					 struct addr_location *al)
 {
 	PyObject *handler, *retval, *t, *dict;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index f7bb7ae..3049b0a 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -17,6 +17,7 @@
 #include "event-parse.h"
 #include "perf_regs.h"
 #include "unwind.h"
+#include "vdso.h"
 
 static int perf_session__open(struct perf_session *self, bool force)
 {
@@ -211,6 +212,7 @@
 	machine__exit(&self->host_machine);
 	close(self->fd);
 	free(self);
+	vdso__exit();
 }
 
 void machine__remove_thread(struct machine *self, struct thread *th)
@@ -388,55 +390,64 @@
 	      (evsel->attr.sample_type & PERF_SAMPLE_STACK_USER)))
 		return 0;
 
+	/* Bail out if nothing was captured. */
+	if ((!sample->user_regs.regs) ||
+	    (!sample->user_stack.size))
+		return 0;
+
 	return unwind__get_entries(unwind_entry, &callchain_cursor, machine,
 				   thread, evsel->attr.sample_regs_user,
 				   sample);
 
 }
 
-static int process_event_synth_tracing_data_stub(union perf_event *event __used,
-						 struct perf_session *session __used)
+static int process_event_synth_tracing_data_stub(union perf_event *event
+						 __maybe_unused,
+						 struct perf_session *session
+						__maybe_unused)
 {
 	dump_printf(": unhandled!\n");
 	return 0;
 }
 
-static int process_event_synth_attr_stub(union perf_event *event __used,
-					 struct perf_evlist **pevlist __used)
+static int process_event_synth_attr_stub(union perf_event *event __maybe_unused,
+					 struct perf_evlist **pevlist
+					 __maybe_unused)
 {
 	dump_printf(": unhandled!\n");
 	return 0;
 }
 
-static int process_event_sample_stub(struct perf_tool *tool __used,
-				     union perf_event *event __used,
-				     struct perf_sample *sample __used,
-				     struct perf_evsel *evsel __used,
-				     struct machine *machine __used)
+static int process_event_sample_stub(struct perf_tool *tool __maybe_unused,
+				     union perf_event *event __maybe_unused,
+				     struct perf_sample *sample __maybe_unused,
+				     struct perf_evsel *evsel __maybe_unused,
+				     struct machine *machine __maybe_unused)
 {
 	dump_printf(": unhandled!\n");
 	return 0;
 }
 
-static int process_event_stub(struct perf_tool *tool __used,
-			      union perf_event *event __used,
-			      struct perf_sample *sample __used,
-			      struct machine *machine __used)
+static int process_event_stub(struct perf_tool *tool __maybe_unused,
+			      union perf_event *event __maybe_unused,
+			      struct perf_sample *sample __maybe_unused,
+			      struct machine *machine __maybe_unused)
 {
 	dump_printf(": unhandled!\n");
 	return 0;
 }
 
-static int process_finished_round_stub(struct perf_tool *tool __used,
-				       union perf_event *event __used,
-				       struct perf_session *perf_session __used)
+static int process_finished_round_stub(struct perf_tool *tool __maybe_unused,
+				       union perf_event *event __maybe_unused,
+				       struct perf_session *perf_session
+				       __maybe_unused)
 {
 	dump_printf(": unhandled!\n");
 	return 0;
 }
 
-static int process_event_type_stub(struct perf_tool *tool __used,
-				   union perf_event *event __used)
+static int process_event_type_stub(struct perf_tool *tool __maybe_unused,
+				   union perf_event *event __maybe_unused)
 {
 	dump_printf(": unhandled!\n");
 	return 0;
@@ -513,7 +524,7 @@
 }
 
 static void perf_event__all64_swap(union perf_event *event,
-				   bool sample_id_all __used)
+				   bool sample_id_all __maybe_unused)
 {
 	struct perf_event_header *hdr = &event->header;
 	mem_bswap_64(hdr + 1, event->header.size - sizeof(*hdr));
@@ -527,7 +538,7 @@
 	if (sample_id_all) {
 		void *data = &event->comm.comm;
 
-		data += ALIGN(strlen(data) + 1, sizeof(u64));
+		data += PERF_ALIGN(strlen(data) + 1, sizeof(u64));
 		swap_sample_id_all(event, data);
 	}
 }
@@ -544,7 +555,7 @@
 	if (sample_id_all) {
 		void *data = &event->mmap.filename;
 
-		data += ALIGN(strlen(data) + 1, sizeof(u64));
+		data += PERF_ALIGN(strlen(data) + 1, sizeof(u64));
 		swap_sample_id_all(event, data);
 	}
 }
@@ -624,7 +635,7 @@
 }
 
 static void perf_event__hdr_attr_swap(union perf_event *event,
-				      bool sample_id_all __used)
+				      bool sample_id_all __maybe_unused)
 {
 	size_t size;
 
@@ -636,14 +647,14 @@
 }
 
 static void perf_event__event_type_swap(union perf_event *event,
-					bool sample_id_all __used)
+					bool sample_id_all __maybe_unused)
 {
 	event->event_type.event_type.event_id =
 		bswap_64(event->event_type.event_type.event_id);
 }
 
 static void perf_event__tracing_data_swap(union perf_event *event,
-					  bool sample_id_all __used)
+					  bool sample_id_all __maybe_unused)
 {
 	event->tracing_data.size = bswap_32(event->tracing_data.size);
 }
@@ -692,7 +703,7 @@
 				      struct perf_tool *tool,
 				      u64 file_offset);
 
-static void flush_sample_queue(struct perf_session *s,
+static int flush_sample_queue(struct perf_session *s,
 			       struct perf_tool *tool)
 {
 	struct ordered_samples *os = &s->ordered_samples;
@@ -705,7 +716,7 @@
 	int ret;
 
 	if (!tool->ordered_samples || !limit)
-		return;
+		return 0;
 
 	list_for_each_entry_safe(iter, tmp, head, list) {
 		if (iter->timestamp > limit)
@@ -715,9 +726,12 @@
 						s->header.needs_swap);
 		if (ret)
 			pr_err("Can't parse sample, err = %d\n", ret);
-		else
-			perf_session_deliver_event(s, iter->event, &sample, tool,
-						   iter->file_offset);
+		else {
+			ret = perf_session_deliver_event(s, iter->event, &sample, tool,
+							 iter->file_offset);
+			if (ret)
+				return ret;
+		}
 
 		os->last_flush = iter->timestamp;
 		list_del(&iter->list);
@@ -737,6 +751,8 @@
 	}
 
 	os->nr_samples = 0;
+
+	return 0;
 }
 
 /*
@@ -779,13 +795,14 @@
  *      etc...
  */
 static int process_finished_round(struct perf_tool *tool,
-				  union perf_event *event __used,
+				  union perf_event *event __maybe_unused,
 				  struct perf_session *session)
 {
-	flush_sample_queue(session, tool);
-	session->ordered_samples.next_flush = session->ordered_samples.max_timestamp;
+	int ret = flush_sample_queue(session, tool);
+	if (!ret)
+		session->ordered_samples.next_flush = session->ordered_samples.max_timestamp;
 
-	return 0;
+	return ret;
 }
 
 /* The queue is ordered by time */
@@ -1443,7 +1460,7 @@
 	err = 0;
 	/* do the final flush for ordered samples */
 	session->ordered_samples.next_flush = ULLONG_MAX;
-	flush_sample_queue(session, tool);
+	err = flush_sample_queue(session, tool);
 out_err:
 	perf_session__warn_about_errors(session, tool);
 	perf_session_free_sample_buffers(session);
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 176a609..aab414f 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -36,9 +36,7 @@
 	struct pevent		*pevent;
 	/*
 	 * FIXME: Need to split this up further, we need global
-	 *	  stats + per event stats. 'perf diff' also needs
-	 *	  to properly support multiple events in a single
-	 *	  perf.data file.
+	 *	  stats + per event stats.
 	 */
 	struct hists		hists;
 	int			fd;
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 0f5a0a4..b5b1b92 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -8,12 +8,11 @@
 const char	*sort_order = default_sort_order;
 int		sort__need_collapse = 0;
 int		sort__has_parent = 0;
+int		sort__has_sym = 0;
 int		sort__branch_mode = -1; /* -1 = means not set */
 
 enum sort_type	sort__first_dimension;
 
-char * field_sep;
-
 LIST_HEAD(hist_entry__sort_list);
 
 static int repsep_snprintf(char *bf, size_t size, const char *fmt, ...)
@@ -23,11 +22,11 @@
 
 	va_start(ap, fmt);
 	n = vsnprintf(bf, size, fmt, ap);
-	if (field_sep && n > 0) {
+	if (symbol_conf.field_sep && n > 0) {
 		char *sep = bf;
 
 		while (1) {
-			sep = strchr(sep, *field_sep);
+			sep = strchr(sep, *symbol_conf.field_sep);
 			if (sep == NULL)
 				break;
 			*sep = '.';
@@ -172,7 +171,7 @@
 
 static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym,
 				     u64 ip, char level, char *bf, size_t size,
-				     unsigned int width __used)
+				     unsigned int width __maybe_unused)
 {
 	size_t ret = 0;
 
@@ -207,7 +206,8 @@
 };
 
 static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf,
-				    size_t size, unsigned int width __used)
+				    size_t size,
+				    unsigned int width __maybe_unused)
 {
 	return _hist_entry__sym_snprintf(self->ms.map, self->ms.sym, self->ip,
 					 self->level, bf, size, width);
@@ -250,7 +250,8 @@
 }
 
 static int hist_entry__srcline_snprintf(struct hist_entry *self, char *bf,
-				   size_t size, unsigned int width __used)
+					size_t size,
+					unsigned int width __maybe_unused)
 {
 	FILE *fp;
 	char cmd[PATH_MAX + 2], *path = self->srcline, *nl;
@@ -399,7 +400,8 @@
 }
 
 static int hist_entry__sym_from_snprintf(struct hist_entry *self, char *bf,
-				    size_t size, unsigned int width __used)
+					size_t size,
+					unsigned int width __maybe_unused)
 {
 	struct addr_map_symbol *from = &self->branch_info->from;
 	return _hist_entry__sym_snprintf(from->map, from->sym, from->addr,
@@ -408,7 +410,8 @@
 }
 
 static int hist_entry__sym_to_snprintf(struct hist_entry *self, char *bf,
-				    size_t size, unsigned int width __used)
+				       size_t size,
+				       unsigned int width __maybe_unused)
 {
 	struct addr_map_symbol *to = &self->branch_info->to;
 	return _hist_entry__sym_snprintf(to->map, to->sym, to->addr,
@@ -509,6 +512,10 @@
 				return -EINVAL;
 			}
 			sort__has_parent = 1;
+		} else if (sd->entry == &sort_sym ||
+			   sd->entry == &sort_sym_from ||
+			   sd->entry == &sort_sym_to) {
+			sort__has_sym = 1;
 		}
 
 		if (sd->taken)
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index e724b26..12d6347 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -31,8 +31,8 @@
 extern const char default_sort_order[];
 extern int sort__need_collapse;
 extern int sort__has_parent;
+extern int sort__has_sym;
 extern int sort__branch_mode;
-extern char *field_sep;
 extern struct sort_entry sort_comm;
 extern struct sort_entry sort_dso;
 extern struct sort_entry sort_sym;
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
new file mode 100644
index 0000000..2374212
--- /dev/null
+++ b/tools/perf/util/stat.c
@@ -0,0 +1,57 @@
+#include <math.h>
+
+#include "stat.h"
+
+void update_stats(struct stats *stats, u64 val)
+{
+	double delta;
+
+	stats->n++;
+	delta = val - stats->mean;
+	stats->mean += delta / stats->n;
+	stats->M2 += delta*(val - stats->mean);
+}
+
+double avg_stats(struct stats *stats)
+{
+	return stats->mean;
+}
+
+/*
+ * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
+ *
+ *       (\Sum n_i^2) - ((\Sum n_i)^2)/n
+ * s^2 = -------------------------------
+ *                  n - 1
+ *
+ * http://en.wikipedia.org/wiki/Stddev
+ *
+ * The std dev of the mean is related to the std dev by:
+ *
+ *             s
+ * s_mean = -------
+ *          sqrt(n)
+ *
+ */
+double stddev_stats(struct stats *stats)
+{
+	double variance, variance_mean;
+
+	if (!stats->n)
+		return 0.0;
+
+	variance = stats->M2 / (stats->n - 1);
+	variance_mean = variance / stats->n;
+
+	return sqrt(variance_mean);
+}
+
+double rel_stddev_stats(double stddev, double avg)
+{
+	double pct = 0.0;
+
+	if (avg)
+		pct = 100.0 * stddev/avg;
+
+	return pct;
+}
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
new file mode 100644
index 0000000..588367c
--- /dev/null
+++ b/tools/perf/util/stat.h
@@ -0,0 +1,16 @@
+#ifndef __PERF_STATS_H
+#define __PERF_STATS_H
+
+#include "types.h"
+
+struct stats
+{
+	double n, mean, M2;
+};
+
+void update_stats(struct stats *stats, u64 val);
+double avg_stats(struct stats *stats);
+double stddev_stats(struct stats *stats);
+double rel_stddev_stats(double stddev, double avg);
+
+#endif
diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c
index 199bc4d..3217059 100644
--- a/tools/perf/util/string.c
+++ b/tools/perf/util/string.c
@@ -1,5 +1,5 @@
 #include "util.h"
-#include "string.h"
+#include "linux/string.h"
 
 #define K 1024LL
 /*
@@ -335,3 +335,19 @@
 
 	return s;
 }
+
+/**
+ * memdup - duplicate region of memory
+ * @src: memory region to duplicate
+ * @len: memory region length
+ */
+void *memdup(const void *src, size_t len)
+{
+	void *p;
+
+	p = malloc(len);
+	if (p)
+		memcpy(p, src, len);
+
+	return p;
+}
diff --git a/tools/perf/util/strlist.c b/tools/perf/util/strlist.c
index 95856ff..155d8b7 100644
--- a/tools/perf/util/strlist.c
+++ b/tools/perf/util/strlist.c
@@ -93,7 +93,7 @@
 
 void strlist__remove(struct strlist *slist, struct str_node *snode)
 {
-	str_node__delete(snode, slist->dupstr);
+	rblist__remove_node(&slist->rblist, &snode->rb_node);
 }
 
 struct str_node *strlist__find(struct strlist *slist, const char *entry)
diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c
index 6738ea1..259f8f2 100644
--- a/tools/perf/util/symbol-minimal.c
+++ b/tools/perf/util/symbol-minimal.c
@@ -69,8 +69,9 @@
 	return -1;
 }
 
-int filename__read_debuglink(const char *filename __used,
-			     char *debuglink __used, size_t size __used)
+int filename__read_debuglink(const char *filename __maybe_unused,
+			     char *debuglink __maybe_unused,
+			     size_t size __maybe_unused)
 {
 	return -1;
 }
@@ -241,7 +242,8 @@
 	return ret;
 }
 
-int symsrc__init(struct symsrc *ss, struct dso *dso __used, const char *name,
+int symsrc__init(struct symsrc *ss, struct dso *dso __maybe_unused,
+		 const char *name,
 	         enum dso_binary_type type)
 {
 	int fd = open(name, O_RDONLY);
@@ -260,13 +262,13 @@
 	return -1;
 }
 
-bool symsrc__possibly_runtime(struct symsrc *ss __used)
+bool symsrc__possibly_runtime(struct symsrc *ss __maybe_unused)
 {
 	/* Assume all sym sources could be a runtime image. */
 	return true;
 }
 
-bool symsrc__has_symtab(struct symsrc *ss __used)
+bool symsrc__has_symtab(struct symsrc *ss __maybe_unused)
 {
 	return false;
 }
@@ -277,17 +279,19 @@
 	close(ss->fd);
 }
 
-int dso__synthesize_plt_symbols(struct dso *dso __used,
-				struct symsrc *ss __used,
-				struct map *map __used,
-				symbol_filter_t filter __used)
+int dso__synthesize_plt_symbols(struct dso *dso __maybe_unused,
+				struct symsrc *ss __maybe_unused,
+				struct map *map __maybe_unused,
+				symbol_filter_t filter __maybe_unused)
 {
 	return 0;
 }
 
-int dso__load_sym(struct dso *dso, struct map *map __used, struct symsrc *ss,
-		  struct symsrc *runtime_ss __used,
-		  symbol_filter_t filter __used, int kmodule __used)
+int dso__load_sym(struct dso *dso, struct map *map __maybe_unused,
+		  struct symsrc *ss,
+		  struct symsrc *runtime_ss __maybe_unused,
+		  symbol_filter_t filter __maybe_unused,
+		  int kmodule __maybe_unused)
 {
 	unsigned char *build_id[BUILD_ID_SIZE];
 
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 753699a..e2e8c69 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1596,7 +1596,7 @@
 	list_add_tail(&dso->node, head);
 }
 
-static struct dso *dsos__find(struct list_head *head, const char *name)
+struct dso *dsos__find(struct list_head *head, const char *name)
 {
 	struct dso *pos;
 
@@ -1755,7 +1755,7 @@
 };
 
 static int symbol__in_kernel(void *arg, const char *name,
-			     char type __used, u64 start)
+			     char type __maybe_unused, u64 start)
 {
 	struct process_args *args = arg;
 
@@ -1991,7 +1991,7 @@
 	if (symbol_conf.initialized)
 		return 0;
 
-	symbol_conf.priv_size = ALIGN(symbol_conf.priv_size, sizeof(u64));
+	symbol_conf.priv_size = PERF_ALIGN(symbol_conf.priv_size, sizeof(u64));
 
 	symbol__elf_init();
 
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index fc4b1e6..4ff45e3 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -10,6 +10,7 @@
 #include <linux/rbtree.h>
 #include <stdio.h>
 #include <byteswap.h>
+#include <libgen.h>
 
 #ifndef NO_LIBELF_SUPPORT
 #include <libelf.h>
@@ -20,14 +21,15 @@
 #ifdef HAVE_CPLUS_DEMANGLE
 extern char *cplus_demangle(const char *, int);
 
-static inline char *bfd_demangle(void __used *v, const char *c, int i)
+static inline char *bfd_demangle(void __maybe_unused *v, const char *c, int i)
 {
 	return cplus_demangle(c, i);
 }
 #else
 #ifdef NO_DEMANGLE
-static inline char *bfd_demangle(void __used *v, const char __used *c,
-				 int __used i)
+static inline char *bfd_demangle(void __maybe_unused *v,
+				 const char __maybe_unused *c,
+				 int __maybe_unused i)
 {
 	return NULL;
 }
@@ -293,6 +295,7 @@
 void dso__sort_by_name(struct dso *dso, enum map_type type);
 
 void dsos__add(struct list_head *head, struct dso *dso);
+struct dso *dsos__find(struct list_head *head, const char *name);
 struct dso *__dsos__findnew(struct list_head *head, const char *name);
 
 int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter);
diff --git a/tools/perf/util/target.c b/tools/perf/util/target.c
index 051eaa6..065528b 100644
--- a/tools/perf/util/target.c
+++ b/tools/perf/util/target.c
@@ -117,8 +117,8 @@
 
 		if (err != buf) {
 			size_t len = strlen(err);
-			char *c = mempcpy(buf, err, min(buflen - 1, len));
-			*c = '\0';
+			memcpy(buf, err, min(buflen - 1, len));
+			*(buf + min(buflen - 1, len)) = '\0';
 		}
 
 		return 0;
diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h
index 33347ca..86ff1b1 100644
--- a/tools/perf/util/top.h
+++ b/tools/perf/util/top.h
@@ -5,6 +5,7 @@
 #include "types.h"
 #include <stddef.h>
 #include <stdbool.h>
+#include <termios.h>
 
 struct perf_evlist;
 struct perf_evsel;
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index a5a554e..aa4c860 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -221,7 +221,7 @@
 }
 
 void parse_proc_kallsyms(struct pevent *pevent,
-			 char *file, unsigned int size __unused)
+			 char *file, unsigned int size __maybe_unused)
 {
 	unsigned long long addr;
 	char *func;
@@ -253,7 +253,7 @@
 }
 
 void parse_ftrace_printk(struct pevent *pevent,
-			 char *file, unsigned int size __unused)
+			 char *file, unsigned int size __maybe_unused)
 {
 	unsigned long long addr;
 	char *printk;
diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c
index 302ff26..8715a10 100644
--- a/tools/perf/util/trace-event-scripting.c
+++ b/tools/perf/util/trace-event-scripting.c
@@ -35,11 +35,11 @@
 	return 0;
 }
 
-static void process_event_unsupported(union perf_event *event __unused,
-				      struct perf_sample *sample __unused,
-				      struct perf_evsel *evsel __unused,
-				      struct machine *machine __unused,
-				      struct addr_location *al __unused)
+static void process_event_unsupported(union perf_event *event __maybe_unused,
+				      struct perf_sample *sample __maybe_unused,
+				      struct perf_evsel *evsel __maybe_unused,
+				      struct machine *machine __maybe_unused,
+				      struct addr_location *al __maybe_unused)
 {
 }
 
@@ -52,17 +52,19 @@
 		"\n  etc.\n");
 }
 
-static int python_start_script_unsupported(const char *script __unused,
-					   int argc __unused,
-					   const char **argv __unused)
+static int python_start_script_unsupported(const char *script __maybe_unused,
+					   int argc __maybe_unused,
+					   const char **argv __maybe_unused)
 {
 	print_python_unsupported_msg();
 
 	return -1;
 }
 
-static int python_generate_script_unsupported(struct pevent *pevent __unused,
-					      const char *outfile __unused)
+static int python_generate_script_unsupported(struct pevent *pevent
+					      __maybe_unused,
+					      const char *outfile
+					      __maybe_unused)
 {
 	print_python_unsupported_msg();
 
@@ -114,17 +116,18 @@
 		"\n  etc.\n");
 }
 
-static int perl_start_script_unsupported(const char *script __unused,
-					 int argc __unused,
-					 const char **argv __unused)
+static int perl_start_script_unsupported(const char *script __maybe_unused,
+					 int argc __maybe_unused,
+					 const char **argv __maybe_unused)
 {
 	print_perl_unsupported_msg();
 
 	return -1;
 }
 
-static int perl_generate_script_unsupported(struct pevent *pevent __unused,
-					    const char *outfile __unused)
+static int perl_generate_script_unsupported(struct pevent *pevent
+					    __maybe_unused,
+					    const char *outfile __maybe_unused)
 {
 	print_perl_unsupported_msg();
 
diff --git a/tools/perf/util/unwind.c b/tools/perf/util/unwind.c
index 00a42aa..958723b 100644
--- a/tools/perf/util/unwind.c
+++ b/tools/perf/util/unwind.c
@@ -307,32 +307,36 @@
 					 need_unwind_info, arg);
 }
 
-static int access_fpreg(unw_addr_space_t __used as, unw_regnum_t __used num,
-			unw_fpreg_t __used *val, int __used __write,
-			void __used *arg)
+static int access_fpreg(unw_addr_space_t __maybe_unused as,
+			unw_regnum_t __maybe_unused num,
+			unw_fpreg_t __maybe_unused *val,
+			int __maybe_unused __write,
+			void __maybe_unused *arg)
 {
 	pr_err("unwind: access_fpreg unsupported\n");
 	return -UNW_EINVAL;
 }
 
-static int get_dyn_info_list_addr(unw_addr_space_t __used as,
-				  unw_word_t __used *dil_addr,
-				  void __used *arg)
+static int get_dyn_info_list_addr(unw_addr_space_t __maybe_unused as,
+				  unw_word_t __maybe_unused *dil_addr,
+				  void __maybe_unused *arg)
 {
 	return -UNW_ENOINFO;
 }
 
-static int resume(unw_addr_space_t __used as, unw_cursor_t __used *cu,
-		  void __used *arg)
+static int resume(unw_addr_space_t __maybe_unused as,
+		  unw_cursor_t __maybe_unused *cu,
+		  void __maybe_unused *arg)
 {
 	pr_err("unwind: resume unsupported\n");
 	return -UNW_EINVAL;
 }
 
 static int
-get_proc_name(unw_addr_space_t __used as, unw_word_t __used addr,
-		char __used *bufp, size_t __used buf_len,
-		unw_word_t __used *offp, void __used *arg)
+get_proc_name(unw_addr_space_t __maybe_unused as,
+	      unw_word_t __maybe_unused addr,
+		char __maybe_unused *bufp, size_t __maybe_unused buf_len,
+		unw_word_t __maybe_unused *offp, void __maybe_unused *arg)
 {
 	pr_err("unwind: get_proc_name unsupported\n");
 	return -UNW_EINVAL;
@@ -377,7 +381,7 @@
 	return 0;
 }
 
-static int access_mem(unw_addr_space_t __used as,
+static int access_mem(unw_addr_space_t __maybe_unused as,
 		      unw_word_t addr, unw_word_t *valp,
 		      int __write, void *arg)
 {
@@ -422,7 +426,7 @@
 	return 0;
 }
 
-static int access_reg(unw_addr_space_t __used as,
+static int access_reg(unw_addr_space_t __maybe_unused as,
 		      unw_regnum_t regnum, unw_word_t *valp,
 		      int __write, void *arg)
 {
@@ -454,9 +458,9 @@
 	return 0;
 }
 
-static void put_unwind_info(unw_addr_space_t __used as,
-			    unw_proc_info_t *pi __used,
-			    void *arg __used)
+static void put_unwind_info(unw_addr_space_t __maybe_unused as,
+			    unw_proc_info_t *pi __maybe_unused,
+			    void *arg __maybe_unused)
 {
 	pr_debug("unwind: put_unwind_info called\n");
 }
diff --git a/tools/perf/util/unwind.h b/tools/perf/util/unwind.h
index 919bd6a..a78c8b3 100644
--- a/tools/perf/util/unwind.h
+++ b/tools/perf/util/unwind.h
@@ -22,11 +22,12 @@
 int unwind__arch_reg_id(int regnum);
 #else
 static inline int
-unwind__get_entries(unwind_entry_cb_t cb __used, void *arg __used,
-		    struct machine *machine __used,
-		    struct thread *thread __used,
-		    u64 sample_uregs __used,
-		    struct perf_sample *data __used)
+unwind__get_entries(unwind_entry_cb_t cb __maybe_unused,
+		    void *arg __maybe_unused,
+		    struct machine *machine __maybe_unused,
+		    struct thread *thread __maybe_unused,
+		    u64 sample_uregs __maybe_unused,
+		    struct perf_sample *data __maybe_unused)
 {
 	return 0;
 }
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 1b8775c..2055cf3 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -1,7 +1,9 @@
 #include "../perf.h"
 #include "util.h"
 #include <sys/mman.h>
+#ifndef NO_BACKTRACE
 #include <execinfo.h>
+#endif
 #include <stdio.h>
 #include <stdlib.h>
 
@@ -163,6 +165,7 @@
 }
 
 /* Obtain a backtrace and print it to stdout. */
+#ifndef NO_BACKTRACE
 void dump_stack(void)
 {
 	void *array[16];
@@ -177,3 +180,6 @@
 
 	free(strings);
 }
+#else
+void dump_stack(void) {}
+#endif
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 00a93a9..70fa70b 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -69,13 +69,8 @@
 #include <sys/poll.h>
 #include <sys/socket.h>
 #include <sys/ioctl.h>
-#include <sys/select.h>
-#include <netinet/in.h>
-#include <netinet/tcp.h>
-#include <arpa/inet.h>
-#include <netdb.h>
 #include <inttypes.h>
-#include "../../../include/linux/magic.h"
+#include <linux/magic.h>
 #include "types.h"
 #include <sys/ttydefaults.h>
 
diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c
new file mode 100644
index 0000000..e60951f
--- /dev/null
+++ b/tools/perf/util/vdso.c
@@ -0,0 +1,111 @@
+
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <linux/kernel.h>
+
+#include "vdso.h"
+#include "util.h"
+#include "symbol.h"
+#include "linux/string.h"
+
+static bool vdso_found;
+static char vdso_file[] = "/tmp/perf-vdso.so-XXXXXX";
+
+static int find_vdso_map(void **start, void **end)
+{
+	FILE *maps;
+	char line[128];
+	int found = 0;
+
+	maps = fopen("/proc/self/maps", "r");
+	if (!maps) {
+		pr_err("vdso: cannot open maps\n");
+		return -1;
+	}
+
+	while (!found && fgets(line, sizeof(line), maps)) {
+		int m = -1;
+
+		/* We care only about private r-x mappings. */
+		if (2 != sscanf(line, "%p-%p r-xp %*x %*x:%*x %*u %n",
+				start, end, &m))
+			continue;
+		if (m < 0)
+			continue;
+
+		if (!strncmp(&line[m], VDSO__MAP_NAME,
+			     sizeof(VDSO__MAP_NAME) - 1))
+			found = 1;
+	}
+
+	fclose(maps);
+	return !found;
+}
+
+static char *get_file(void)
+{
+	char *vdso = NULL;
+	char *buf = NULL;
+	void *start, *end;
+	size_t size;
+	int fd;
+
+	if (vdso_found)
+		return vdso_file;
+
+	if (find_vdso_map(&start, &end))
+		return NULL;
+
+	size = end - start;
+
+	buf = memdup(start, size);
+	if (!buf)
+		return NULL;
+
+	fd = mkstemp(vdso_file);
+	if (fd < 0)
+		goto out;
+
+	if (size == (size_t) write(fd, buf, size))
+		vdso = vdso_file;
+
+	close(fd);
+
+ out:
+	free(buf);
+
+	vdso_found = (vdso != NULL);
+	return vdso;
+}
+
+void vdso__exit(void)
+{
+	if (vdso_found)
+		unlink(vdso_file);
+}
+
+struct dso *vdso__dso_findnew(struct list_head *head)
+{
+	struct dso *dso = dsos__find(head, VDSO__MAP_NAME);
+
+	if (!dso) {
+		char *file;
+
+		file = get_file();
+		if (!file)
+			return NULL;
+
+		dso = dso__new(VDSO__MAP_NAME);
+		if (dso != NULL) {
+			dsos__add(head, dso);
+			dso__set_long_name(dso, file);
+		}
+	}
+
+	return dso;
+}
diff --git a/tools/perf/util/vdso.h b/tools/perf/util/vdso.h
new file mode 100644
index 0000000..0f76e7c
--- /dev/null
+++ b/tools/perf/util/vdso.h
@@ -0,0 +1,18 @@
+#ifndef __PERF_VDSO__
+#define __PERF_VDSO__
+
+#include <linux/types.h>
+#include <string.h>
+#include <stdbool.h>
+
+#define VDSO__MAP_NAME "[vdso]"
+
+static inline bool is_vdso_map(const char *filename)
+{
+	return !strcmp(filename, VDSO__MAP_NAME);
+}
+
+struct dso *vdso__dso_findnew(struct list_head *head);
+void vdso__exit(void);
+
+#endif /* __PERF_VDSO__ */
diff --git a/tools/perf/util/wrapper.c b/tools/perf/util/wrapper.c
index 73e900e..19f15b6 100644
--- a/tools/perf/util/wrapper.c
+++ b/tools/perf/util/wrapper.c
@@ -7,7 +7,8 @@
  * There's no pack memory to release - but stay close to the Git
  * version so wrap this away:
  */
-static inline void release_pack_memory(size_t size __used, int flag __used)
+static inline void release_pack_memory(size_t size __maybe_unused,
+				       int flag __maybe_unused)
 {
 }
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 2468523..d617f69 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1976,9 +1976,10 @@
 			if (copy_from_user(&csigset, sigmask_arg->sigset,
 					   sizeof csigset))
 				goto out;
-		}
-		sigset_from_compat(&sigset, &csigset);
-		r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset);
+			sigset_from_compat(&sigset, &csigset);
+			r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset);
+		} else
+			r = kvm_vcpu_ioctl_set_sigmask(vcpu, NULL);
 		break;
 	}
 	default: